###############################
#Parizek, Michal, and Matthew D. Stephen. 'The Increasing Representativeness of International Organizations' Secretariats: Evidence from the United Nations System, 1997-2015'. International Studies Quarterly, 2020.
#Statistical data and analysis script (supplementary material)
#Please read the article appendix and data analysis log for replication instructions


####################################################################
####################################################################
#System setup

##################
#Libraries

#install.packages("car")
library(car)
#install.packages("lmtest")
library(lmtest)
#install.packages("sandwich")
library(sandwich)
#install.packages("stargazer")
library(stargazer)
#install.packages("multiwayvcov")
library(multiwayvcov)
#install.packages("plm")
library(plm)
#install.packages("ggplot2")
library(ggplot2)
#install.packages("gridExtra")
library(gridExtra)
#install.packages("tseries")
library(tseries)
#install.packages("scales")
library(scales)
#install.packages("sjPlot")
library(sjPlot)
#install.packages("tidyverse")
library(tidyverse)




##################
#Set path
rm(list = ls(all.names = TRUE))
dir="drive:/path"
setwd(dir)


####################################################################
####################################################################
#Load datasets

##################
#Main dataset
d9715<-read.table("representativeness_international_secretariats_main_data.txt",sep="\t", header=T)

##################
#Additional data for some figures and robustness analyses
igo_visibility_comparison<-read.table("representativeness_international_secretariats_igo_visibility_comparison.txt",sep="\t",header=T)
dadd<-read.table("representativeness_international_secretariats_additional_data.txt", header=T,sep=" ")
imfwtoy<-read.table("representativeness_international_secretariats_imf_wto.txt",sep="\t", header=T)



####################################################################
####################################################################
#Choice of IO visibility variable for robustness tests
#Only return to this choice later for setting different visibility variables (Table A11 and A12); no need to change 

##################
#Factiva - default, used in most analyses
d9715$visible<-d9715$visiblef

##################
#Google hits (for Table A11)
#when using these alternative measures, always load the main dataset anew, as there are different missing data values in different visibility variables
#d9715$visible<-d9715$visibleg

##################
#Alternative measures of broad political visibility/significance (for Table A12)
#When using these alternative measures, always load the main dataset anew, as there are different missing data values in different visibility variables
#Because of the way data are aggregated later on, the specific visibility variable must be changed here
#d9715$visible<-d9715$bigbudget #size of budget
#d9715$visible<-d9715$bigstaffriseabs #absolute rise in the number of staff over two decades
#d9715$visible<-d9715$operational #operational on-the-ground visibility


####################################################################
####################################################################
#Figure A1 and related preparation
#igo stands for IGO-level aggregation
#y in variable names stands for yearly data

##################
#IO-year aggregate
d9715igoy<-
  d9715 %>%
  group_by(year,igobodyabb) %>%
  summarize(staffnumberigototaly=sum(staffnumber,na.rm=T),
            gsstaffnumbernotinhqigototaly=mean(gsstaffnumbernotinhqigototal,na.rm=T),
            gsstaffnumberinhqigototaly=mean(gsstaffnumberinhqigototal,na.rm=T))
d9715igoy$gsstaffnumberigototaly<-d9715igoy$gsstaffnumbernotinhqigototaly+d9715igoy$gsstaffnumberinhqigototaly

##################
#Year aggregate
d9715y<-
  d9715igoy %>%
  group_by(year) %>%
  summarize(staffnumberigototaly=sum(staffnumberigototaly,na.rm=T),
            gsstaffnumbernotinhqigototaly=sum(gsstaffnumbernotinhqigototaly,na.rm=T),
            gsstaffnumberinhqigototaly=sum(gsstaffnumberinhqigototaly,na.rm=T))
d9715y$gsstaffnumberigototaly<-d9715y$gsstaffnumbernotinhqigototaly+d9715y$gsstaffnumberinhqigototaly


##################
#Figure A1 construction and export
jpeg(filename="fa1_un_staff_trends.jpg",quality = 100, width=800, height=600)
ggplot(d9715y,aes()) +
  geom_line(aes(y=staffnumberigototaly, x=year),size=2, linetype="solid")+
  geom_line(aes(y=gsstaffnumberigototaly, x=year),size=2, linetype="dashed")+
  geom_line(aes(y=gsstaffnumbernotinhqigototaly, x=year),size=1, linetype="dotted")+
  geom_line(aes(y=gsstaffnumberinhqigototaly, x=year),size=1, linetype="dotdash")+
  scale_x_continuous(name="Log staff number",
                     breaks=c(1997,2000,2005,2010,2015))+
  ggtitle("Trends in UN bodies staff size")+
  labs(y="Number of staff members",x="Year")+
  theme(legend.position="none",text = element_text(size=25))+
  theme_classic(base_size = 22)
dev.off()



####################################################################
####################################################################
#Figure A2 and related calculations
#iso3 stands for country
#y stands for yearly data

##################
#Country-year aggregate
d9715iso3y<-
  d9715 %>%
  group_by(iso3,year) %>%
  summarize(staffnumber=sum(staffnumber,na.rm=T),
            population=mean(population, na.rm=T))

##################
#Country aggregate
d9715iso3<-
  d9715iso3y %>%
  group_by(iso3) %>%
  summarize(staffnumber=mean(staffnumber,na.rm=T),
            population=mean(population, na.rm=T))

d9715iso3$staffpermil<-d9715iso3$staffnumber/(d9715iso3$population/(10**6))
d9715iso3s<-subset(d9715iso3,d9715iso3$population>10**6 & d9715iso3$staffnumber>3)
d9715iso3s$logstaffnumber<-log10(d9715iso3s$staffnumber)
d9715iso3s$logstaffpermil<-log10(d9715iso3s$staffpermil)


##################
#Figure A2 construction and export
jpeg(filename="fa2_staff_representation_plot.jpg",quality = 100, width=800, height=600)
ggplot(d9715iso3s,aes(x=logstaffnumber, y=logstaffpermil)) +
  geom_point(alpha=0.2)+
  geom_text(label=d9715iso3s$iso3)+
  scale_size_continuous(range = c(0.5, 16))+
  theme_classic()+
  scale_x_continuous(name="Log staff number",
                     breaks=c(1,2,3),
                     labels=c(10,100,1000)
  )+
  scale_y_continuous(name="Log staff nubmer per mil. inhabitants",
                     breaks=c(0,1,2),
                     labels=c(1,10,100),
                     limits=c(-0.8,2)
  )+
  theme(axis.text=element_text(size=12),axis.title=element_text(size=14,face="bold"))
dev.off()



####################################################################
####################################################################
#Table A2 and related preparations

##################
#Data for initial and final time points, including missing data for UNOPS for 2015
d15<-subset(d9715,d9715$year==2015)
d15unops<-subset(d9715,d9715$igobodyabb=="UNOPS" & d9715$year==2014) #missing 2015 for UNOPS
d15<-rbind(d15,d15unops)
d15s<-d15[c("igobodyabb","iso3","staffnumberigototaly","riseabs", "riserel",
            "gsstaffnumberinhqigototal","gsstaffnumbernotinhqigototal",
            "outsidehqshare","factivahits01","googlehits01","gapihits17","regularbudget","visiblef","visibleg","visiblegapi")]
d15s$gsstaffnumbertotal<-d15s$gsstaffnumbernotinhqigototal+d15s$gsstaffnumberinhqigototal
d15s$outsidehqshare100<-100*d15s$outsidehqshare
d15s$riserel100<-100*d15s$riserel


##################
#Table A2 construction and export
igocharacteristics15<-
  d15s %>%
  group_by(igobodyabb) %>%
  summarize(staffnumberigototal=mean(staffnumberigototaly,na.rm=T),
            gsstaffnumbertotal=mean(gsstaffnumbertotal,na.rm=T),
            outsidehqshare=round(mean(outsidehqshare100,na.rm=T),1),
            riserel100=round(mean(riserel100,na.rm=T),1),
            factivahits01=mean(factivahits01,na.rm=T),
            visiblef=mean(visiblef,na.rm=T),
            googlehits01=mean(googlehits01,na.rm=T),
            visibleg=mean(visibleg,na.rm=T),
            gapihits17=mean(gapihits17,na.rm=T),
            visiblegapi=mean(visiblegapi,na.rm=T))
write.table(igocharacteristics15,file="ta2_igocharacteristics15.txt",row.names=F)


####################################################################
####################################################################
#Table A3 and related preparations

##################
#Data for initial and final period preparation
d15igo<-d15[c("igobodyabb","iso3","staffnumber","population")]
names(d15igo)<-c("igobodyabb","iso3","staffnumber15","population15")
d97<-subset(d9715,d9715$year==1997)
d97igo<-d97[c("igobodyabb","iso3","staffnumber","population")]
names(d97igo)<-c("igobodyabb","iso3","staffnumber97","population97")
d9715igo<-merge(d97igo,d15igo,by=c("igobodyabb","iso3"))

##################
#Country-level aggregation
d9715iso<-
  d9715igo %>%
  group_by(iso3) %>%
  summarize(staffnumber15=sum(staffnumber15,na.rm=T),
            staffnumber97=sum(staffnumber97,na.rm=T),
            population15=mean(population15,na.rm=T),
            population97=mean(population97,na.rm=T))

##################
#Variable calculation
d9715iso$staffpermil15<-round(d9715iso$staffnumber15/(d9715iso$population15/1000000),2)
d9715iso$staffpermil97<-round(d9715iso$staffnumber97/(d9715iso$population97/1000000),2)
d9715iso$staffshare15<-d9715iso$staffnumber15/sum(d9715iso$staffnumber15,na.rm=T)
d9715iso$staffshare97<-d9715iso$staffnumber97/sum(d9715iso$staffnumber97,na.rm=T)
d9715iso$staffchange<-d9715iso$staffnumber15-d9715iso$staffnumber97
d9715iso$staffsharechangeabs<-d9715iso$staffshare15-d9715iso$staffshare97
d9715iso$staffshare15_100<-round(100*(d9715iso$staffshare15),2)
d9715iso$staffshare97_100<-round(100*(d9715iso$staffshare97),2)
d9715iso$staffsharechangerel<-((d9715iso$staffshare15_100/d9715iso$staffshare97_100)-1)*100
d9715iso2<-d9715iso[order(-d9715iso$staffnumber15),] 

##################
#Table A3 creation and export
d9715iso2<-d9715iso2[c("iso3","staffnumber15","staffnumber97","staffpermil15","staffpermil97","staffshare15_100","staffshare97_100","staffchange")]
write.table(d9715iso2,file="ta3_country_representation.txt")



####################################################################
####################################################################
#Figure A7

##################
#IGO-year aggregation
igoy_size<-
  d9715 %>%
  group_by(igobodyabb,year) %>%
  summarize(staffnumberigototal=mean(staffnumberigototaly,na.rm=T),
            factivahits01=mean(factivahits01,na.rm=T),
            googlehits01=mean(googlehits01,na.rm=T),
            gapihits17=mean(gapihits17,na.rm=T),
            regularbudget=mean(regularbudget,na.rm=T),
            riserel=mean(riserel,na.rm=T),
            visibleg=mean(visibleg,na.rm=T),
            visiblef=mean(visiblef,na.rm=T),
            visiblegapi=mean(visiblegapi,na.rm=T),
            operational=mean(operational,na.rm=T),
            bigbudget=mean(bigbudget,na.rm=T),
            bigstaff=mean(bigstaff,na.rm=T))

##################
#IGO aggregation
igo_size<-
  d9715 %>%
  group_by(igobodyabb) %>%
  summarize(staffnumberigototal=mean(staffnumberigototaly,na.rm=T),
            factivahits01=mean(factivahits01,na.rm=T),
            googlehits01=mean(googlehits01,na.rm=T),
            gapihits17=mean(gapihits17,na.rm=T),
            regularbudget=mean(regularbudget,na.rm=T),
            riserel=mean(riserel,na.rm=T),
            visibleg=mean(visibleg,na.rm=T),
            visiblef=mean(visiblef,na.rm=T),
            visiblegapi=mean(visiblegapi,na.rm=T),
            operational=mean(operational,na.rm=T),
            bigbudget=mean(bigbudget,na.rm=T),
            bigstaff=mean(bigstaff,na.rm=T))


##################
#Yearly median scores
logstaffnumberigototalmed<-median(log10(igoy_size$staffnumberigototal),na.rm=T)
logfactivahits01med<-median(log10(igoy_size$factivahits01),na.rm=T)
logregularbudgetmed<-median(log10(igoy_size$regularbudget),na.rm=T)


##################
#Figure A7 construction and export
igosize1<-ggplot(igo_size, aes(x=log10(regularbudget), y=log10(staffnumberigototal))) +
  geom_text(aes(label=igobodyabb),hjust=0, vjust=0)+
  geom_point(alpha=.3) +
  geom_smooth(alpha=.2, size=1,method=lm)+
  geom_vline(xintercept=logregularbudgetmed, linetype=1)+
  geom_hline(yintercept=logstaffnumberigototalmed, linetype=1)+
  labs(x="Regular IO budget (log)",y="Professional staff size (log)")+
  theme(legend.position="none",text = element_text(size=25))+
  theme_classic()
igosize1

igosize2<-ggplot(igo_size, aes(x=log10(factivahits01), y=log10(staffnumberigototal))) +
  geom_text(aes(label=igobodyabb),hjust=0, vjust=0)+
  geom_point(alpha=.3) +
  geom_smooth(alpha=.2, size=1,method=lm)+
  geom_hline(yintercept=logstaffnumberigototalmed, linetype=1)+
  geom_vline(xintercept=logfactivahits01med, linetype=1)+
  labs(x="FACTIVA hits (log)",y="Professional staff size (log)")+
  theme(legend.position="none",text = element_text(size=25))+
  theme_classic()
igosize2

jpeg(filename="fa7_igosize.jpg",quality = 100, width=800, height=400)
grid.arrange(igosize1,igosize2, nrow = 1)
dev.off()



####################################################################
####################################################################
#Figure A6, Table A7


##################
#Figure A6 construction and export
igo_factiva_gapi<-ggplot(igo_visibility_comparison, aes(x=log10factivahits17, y=log10gapihits17)) +
  geom_text(aes(label=igobodyabb),hjust=0, vjust=0)+
  geom_point(alpha=.3) +
  geom_smooth(alpha=.2, size=1,method=lm)+
  geom_vline(xintercept=median(log10(igo_visibility_comparison$factivahits17)))+
  geom_hline(yintercept=median(log10(igo_visibility_comparison$gapihits17)))+
  labs(x="Factiva hits (log)",y="GDELT API translingual hits (log)")+
  ggtitle("Visibility scores: Factiva and GDELT")+
  theme(legend.position="none",text = element_text(size=25))+
  theme_classic(base_size = 22)
igo_factiva_gapi

igo_factiva_google<-ggplot(igo_visibility_comparison, aes(x=log10factivahits17, y=log10googlehits16)) +
  geom_text(aes(label=igobodyabb),hjust=0, vjust=0)+
  geom_point(alpha=.3) +
  geom_smooth(alpha=.2, size=1,method=lm)+
  geom_vline(xintercept=median(log10(igo_visibility_comparison$factivahits17)))+
  geom_hline(yintercept=median(log10(igo_visibility_comparison$googlehits16)))+
  labs(x="Factiva hits (log)",y="Google hits (log)")+
  ggtitle("Visibility scores: Factiva and Google")+
  theme(legend.position="none",text = element_text(size=25))+
  theme_classic(base_size = 22)
igo_factiva_google

jpeg(filename="fa6_igo_factiva_google.jpg",quality = 100, width=1200, height=600)
grid.arrange(igo_factiva_google,igo_factiva_gapi,nrow=1)
dev.off()

##################
#Table A7 regression model
#gapi stands for GDELT API as a data source
reg_factiva_google<-lm(log10factivahits17~log10googlehits16,data=igo_visibility_comparison)
summary(reg_factiva_google)
reg_factiva_gapi<-lm(log10factivahits17~log10gapihits17,data=igo_visibility_comparison)
summary(reg_factiva_gapi)

reg_factiva_gdelt<-stargazer(reg_factiva_google,reg_factiva_gapi,
                             covariate.labels = c("Google hits (log)","GDELT hits (log)"),
                             dep.var.labels=c("Factiva hits (log)"),
                             star.cutoffs = c(0.1,0.05, 0.01, 0.001),
                             star.char = c("'", "*", "**","***"),
                             single.row=T, no.space=T, omit.stat = c("f", "ser"), type="html",
                             notes = "'p<0.1; *p<0.05; **p<0.01; ***p<0.001; standard errors in brackets",
                             notes.append = FALSE)
write.table(reg_factiva_gdelt, file="ta7_reg_factiva.html",col.names=F, row.names=FALSE,quote=FALSE)


####################################################################
####################################################################
#Figure A4 

##################
#Figure A4 construction and export
plot_pc_share_change_gnipc<-ggplot(dadd, aes(x=log10(gnipc), y=pc_share_change)) +
  geom_point(alpha=.3) +
  geom_smooth(alpha=.2, size=1,method=lm)+
  geom_text(aes(label=iso3),hjust=0, vjust=0,size=5)+
  geom_hline(yintercept=0)+
  labs(x="GNI per capita (log10)",y="Change in % of permanent contracts")+
  theme_classic(base_size = 15)
plot_pc_share_change_gnipc

plot_pc_share_change_gni<-ggplot(dadd, aes(x=log10(gni), y=pc_share_change)) +
  geom_point(alpha=.3) +
  geom_smooth(alpha=.2, size=1,method=lm)+
  geom_text(aes(label=iso3),hjust=0, vjust=0, size=5)+
  geom_hline(yintercept=0)+
  labs(x="GNI (log10)",y="Change in % of permanent contracts")+
  theme_classic(base_size = 15)
plot_pc_share_change_gni

jpeg(filename="fa4_pc_share_change_gnipc_gni.jpg",quality = 100, width=1200, height=600)
grid.arrange(plot_pc_share_change_gnipc,plot_pc_share_change_gni,
             nrow = 1)
dev.off()


####################################################################
####################################################################
#Figure A3, Table A4
#w at the end, as in logunsecstaffnumberw, denotes weighted representation
#ws at the end, as in logunsecstaffnumberws, denotes weighted representation where only senior staff is counted

##################
#UN Secretariat seniority plots
unsec_seniority_plot1<-ggplot(dadd, aes(x=logunsecstaffnumber, y=logunsecstaffnumberw)) +
  geom_point(alpha=.3) +
  geom_smooth(alpha=.2, size=1,method=lm)+
  labs(x="Staff number count (log)",y="Grade-weighted staff number(log)")+
  ggtitle("UN")+
  theme(legend.position="none",text = element_text(size=25))+
  theme_classic(base_size = 25)
unsec_seniority_plot1

unsec_seniority_plot2<-ggplot(dadd, aes(x=logunsecstaffnumber, y=logunsecstaffnumberws)) +
  geom_point(alpha=.3) +
  geom_smooth(alpha=.2, size=1,method=lm)+
  labs(x="Staff number count (log)",y="Senior grade-weighted staff number (log)")+
  ggtitle("UN, senior staff only")+
  theme(legend.position="none",text = element_text(size=25))+
  theme_classic(base_size = 25)
unsec_seniority_plot2

##################
#WHO seniority plots
who_seniority_plot1<-ggplot(dadd, aes(x=logwhostaffnumber, y=logwhostaffnumberw)) +
  geom_point(alpha=.3) +
  geom_smooth(alpha=.2, size=1,method=lm)+
  labs(x="Staff number count (log)",y="Grade-weighted staff number(log)")+
  ggtitle("WHO")+
  theme(legend.position="none",text = element_text(size=25))+
  theme_classic(base_size = 25)
who_seniority_plot1

who_seniority_plot2<-ggplot(dadd, aes(x=logwhostaffnumber, y=logwhostaffnumberws)) +
  geom_point(alpha=.3) +
  geom_smooth(alpha=.2, size=1,method=lm)+
  labs(x="Staff number count (log)",y="Senior grade-weighted staff number (log)")+
  ggtitle("WHO, senior staff only")+
  theme(legend.position="none",text = element_text(size=25))+
  theme_classic(base_size = 25)
who_seniority_plot2


##################
#Figure A3 export
jpeg(filename="fa3_robust_hierarchy.jpg",quality = 100, width=1600, height=1200)
grid.arrange(unsec_seniority_plot1,unsec_seniority_plot2,who_seniority_plot1,who_seniority_plot2, 
             nrow = 2)
dev.off()



##################
#Seniority regression models for UN Secretariat and WHO
reg_unsec1<-lm(dadd$logunsecstaffnumberw~dadd$logunsecstaffnumber)
reg_unsec2<-lm(dadd$logunsecstaffnumberws~dadd$logunsecstaffnumber)
reg_who1<-lm(dadd$logwhostaffnumberw~dadd$logwhostaffnumber)
reg_who2<-lm(dadd$logwhostaffnumberws~dadd$logwhostaffnumber)

hierarchy<-stargazer(reg_unsec1,reg_unsec2,reg_who1,reg_who2,
                     covariate.labels = c("UN Sec. staff number (log)","WHO staff number (log)"),
                     dep.var.labels=c("UN weighted", "UN senior weighted",
                                      "WHO weighted", "WHO senior weighted"),
                     star.cutoffs = c(0.1,0.05, 0.01, 0.001),
                     star.char = c("'", "*", "**","***"),
                     single.row=T, no.space=T, omit.stat = c("f", "ser"), type="html",
                     notes = "'p<0.1; *p<0.05; **p<0.01; ***p<0.001; standard errors in brackets",
                     notes.append = FALSE)
write.table(hierarchy, file="ta4_robust_hierarchy.html",col.names=F, row.names=FALSE,quote=FALSE)



####################################################################
####################################################################
#Figure A5 construction and export


##################
#Budgetary contributions (assessed) and GNI plot
regbudget_plot<-ggplot(dadd, aes(x=loggni, y=logtotalun)) +
  geom_point(alpha=.3) +
  geom_smooth(alpha=.2, size=1,method=lm)+
  labs(x="GNI (log)",y="UN regular budget contr. (log)")+
  ggtitle("Regular budget, 2015")+
  theme(legend.position="none",text = element_text(size=25))+
  theme_classic(base_size = 25)
regbudget_plot

##################
#Budgetary contributions (voluntary) and GNI plot
voluntbudget_plot<-ggplot(dadd, aes(x=loggni, y=logvolunt)) +
  geom_point(alpha=.3) +
  geom_smooth(alpha=.2, size=1,method=lm)+
  labs(x="GNI (log)",y="UN voluntary budget contr. (log)")+
  ggtitle("Voluntary contributions, 2015")+
  theme(legend.position="none",text = element_text(size=25))+
  theme_classic(base_size = 25)
voluntbudget_plot

jpeg(filename="fa5_budget_gni.jpg",quality = 100, width=1200, height=600)
grid.arrange(regbudget_plot,voluntbudget_plot, nrow = 1)
dev.off()

##################
#Budgetary contributions and GNI regression models
regbudget_reg<-lm(logtotalun~loggni,data=dadd)
voluntbudget_reg<-lm(logvolunt~loggni,data=dadd)

reg_budget<-stargazer(regbudget_reg,voluntbudget_reg,
                      covariate.labels = c("GNI (log)"),
                      dep.var.labels=c("UN regular budget contr. (log)","UN voluntary budget contr. (log)"),
                      star.cutoffs = c(0.1,0.05, 0.01, 0.001),
                      star.char = c("'", "*", "**","***"),
                      single.row=T, no.space=T, omit.stat = c("f", "ser"), type="html",
                      notes = "'p<0.1; *p<0.05; **p<0.01; ***p<0.001; standard errors in brackets",
                      notes.append = FALSE)
write.table(reg_budget, file="ta6_reg_budget.html",col.names=F, row.names=FALSE,quote=FALSE)
#



####################################################################
####################################################################
#Figure 1 and related preparations

##################
#IGO-year aggregation
#digoy stands for data on IGO-year level
digoy<-
  d9715 %>%
  group_by(igobodyabb,year) %>%
  summarize(visible=mean(visible,na.rm=T),
            staffnumberoecd1994=sum(staffnumberoecd1994,na.rm=T),
            staffnumbernonoecd1994=sum(staffnumbernonoecd1994,na.rm=T),
            staffnumberioy=sum(staffnumber,na.rm=T),
            unlocalgsstaffiniso3=sum(unlocalgsstaffiniso3,na.rm=T),
            factivahits01=mean(factivahits01,na.rm=T),
            googlehits01=mean(googlehits01,na.rm=T),
            gsstaffnumberinhq=mean(gsstaffnumberinhq,na.rm=T),
            gsstaffnumbernotinhq=mean(gsstaffnumbernotinhq, na.rm=T))
digoy$staffoecdshare<-digoy$staffnumberoecd1994/(digoy$staffnumberoecd1994+digoy$staffnumbernonoecd1994)

##################
#Year aggregation
#staff numbers from OECD and non-OECD members
dy<-
  digoy %>%
  group_by(year) %>%
  summarize(staffnumberoecd1994=sum(staffnumberoecd1994,na.rm=T),
            staffnumbernonoecd1994=sum(staffnumbernonoecd1994,na.rm=T),
            staffnumber=sum(staffnumberioy,na.rm=T))

#overall figure for share of staff, across UN system, from year 1994 OECD members, for 1997 and 2015
oecdsh1997<-dy$staffnumberoecd1994[dy$year==1997]/dy$staffnumber[dy$year==1997]
oecdsh2015<-dy$staffnumberoecd1994[dy$year==2015]/dy$staffnumber[dy$year==2015]



##################
#Aggregates for Figure 1: OECD vs. non-OECD staff, across hihgly and lowly visible IOs
oecdy<-
  d9715 %>%
  group_by(oecdmember1994,year,visible) %>%
  summarize(staffnumber=sum(staffnumber, na.rm=T))

oecdy2<-
  oecdy %>%
  group_by(year,visible) %>%
  mutate(staffnumberysum=sum(staffnumber, na.rm=T))
oecdy2$groupstaffshare<-oecdy2$staffnumber/oecdy2$staffnumberysum

oecdy3<-
  oecdy %>%
  group_by(oecdmember1994,year) %>%
  summarize(staffnumber=sum(staffnumber, na.rm=T))

oecdy4<-
  oecdy3 %>%
  group_by(year) %>%
  mutate(staffnumberysum=sum(staffnumber, na.rm=T))
oecdy4$groupstaffshare<-oecdy4$staffnumber/oecdy4$staffnumberysum
oecdy4$visible<-2
oecdy5<-rbind(oecdy2,oecdy4)


##################
#Figure 1 (left): displays shares of staff from OECD members, for highly and lowly visible IOs (non-OECD members are the complement to 100%)
oecdyplot<- ggplot(subset(oecdy5,(oecdy5$oecdmember1994==1 & !is.na(oecdy5$visible))), aes(x=year, y=groupstaffshare,
                                                                colour=as.factor(-visible),
                                                                linetype = as.factor(-visible))) +
  geom_point(alpha=.3, show.legend = F) +
  geom_smooth(alpha=.2,method=lm, show.legend = F)+
  ylim(0.4,0.8)+
  ggtitle("UN System")+
  ylab("Share of staff from OECD countries")+
  xlab("")+
  theme_classic(base_size = 12)
oecdyplot

##################
#Figure 1 (right): displays shares of staff from OECD members, for IMF and WTO (non-OECD members are the complement to 100%)
imfwtoplot<- ggplot(imfwtoy, aes(x=year, y=staffnumberoecd1994share, colour=as.factor(igobodyabb),
                                                                linetype = as.factor(igobodyabb))) +
  geom_point(alpha=.3, show.legend = F) +
  geom_smooth(alpha=.2, size=1,method=lm, show.legend = F)+
  ylim(0.4,0.8)+
  ggtitle("IMF, WTO")+
  ylab("Share of staff from OECD countries")+
  xlab("")+
  theme_classic(base_size = 12)
imfwtoplot

jpeg(filename="fig1_oecdshare_regshare_time_grid.jpg", width=6.5, height=4, units="in", res=300)
grid.arrange(oecdyplot, imfwtoplot, nrow = 1)
dev.off()



####################################################################
####################################################################
#Data aggregates for models in Table 1

#iso3 refers to data being aggregated across countries
#y refers to data being aggregated over time
#x refers to data being aggregated across lowly and highly visible IOs
#hence d9715iso3yx is aggregation of d9715 across countries, over time, and across visibility levels

#exclude a small number of observations with missing values
d9715<-subset(d9715,!is.na(d9715$visible))


##################
#aggregate by country-year-IO visibility
d9715iso3yx<-
  d9715 %>%
  group_by(iso3,year,georeg,geosubreg,visible) %>%
  summarize(staffnumber=sum(staffnumber, na.rm=T),
            unlocalgsstaffiniso3=sum(unlocalgsstaffiniso3,na.rm=T),
            googlehits01yearlysum=sum(googlehits01,na.rm=T),
            factivahits01yearlysum=sum(factivahits01,na.rm=T),
            gni=mean(gni_currentusd,na.rm=T),
            unvoluntarybudgetarycontributions=mean(unvoluntarybudgetarycontributions,na.rm=T),
            population=mean(population,na.rm=T),
            tertiaryenrollmentint=mean(tertiaryenrollmentint,na.rm=T),
            worldlanguagefirst_eng=mean(worldlanguagefirst_eng,na.rm=T),
            worldlanguagefirst_eng_fre=mean(worldlanguagefirst_eng_fre,na.rm=T),
            governmenteffectiveness=mean(governmenteffectiveness,na.rm=T),
            polity=mean(polity,na.rm=T),
            gsstaffnumbernotinhq=mean(gsstaffnumbernotinhq,na.rm=T), 
            gsstaffnumberinhq=mean(gsstaffnumberinhq,na.rm=T),
            unfounding=mean(unfounding,na.rm=T),
            unfoundingsuccessor=mean(unfoundingsuccessor,na.rm=T),
            unp5=mean(unp5,na.rm=T),
            oecdmember1994=mean(oecdmember1994,na.rm=T),
            whostaffnumber=mean(whostaffnumber,na.rm=T),
            whostaffnumberw=mean(whostaffnumberw,na.rm=T),
            whostaffnumberws=mean(whostaffnumberws,na.rm=T),
            unsecstaffnumber=mean(unsecstaffnumber,na.rm=T),
            unsecstaffnumberw=mean(unsecstaffnumberw,na.rm=T),
            unsecstaffnumberws=mean(unsecstaffnumberws,na.rm=T))

##################
#generates aggregate number of staff members, in the given year, in all lowly visible and all highly visible IOs
d9715igox<-
  d9715 %>%
  group_by(year,visible) %>%
  summarize(staffnumber_vis_igosy=sum(staffnumber, na.rm=T))
d9715iso3yx<-merge(d9715iso3yx,d9715igox,by=c("visible","year"))

####################################################################
####################################################################
#Variables construction (logs)

##################
#Population, GNI, GNI pc
d9715iso3yx$logpopulation<-log10(d9715iso3yx$population)
d9715iso3yx$loggni<-log10(d9715iso3yx$gni)
d9715iso3yx$gnipc<-d9715iso3yx$gni/d9715iso3yx$population
d9715iso3yx$loggnipc<-log10(d9715iso3yx$gnipc)

##################
#Professional staff number
d9715iso3yx$logstaffnumber<-log10(d9715iso3yx$staffnumber)
d9715iso3yx$logstaffnumber[d9715iso3yx$logstaffnumber=='-Inf']<-NA

##################
#Budgetary contributions (voluntary)
d9715iso3yx$logunvoluntarybudgetarycontributions<-log10(d9715iso3yx$unvoluntarybudgetarycontributions)
d9715iso3yx$logunvoluntarybudgetarycontributions[d9715iso3yx$logunvoluntarybudgetarycontributions=='-Inf']<-0

##################
#Local operational activity (local/non-HQ/ general services staff)
d9715iso3yx$logunlocalgsstaffiniso3<-log10(d9715iso3yx$unlocalgsstaffiniso3)
d9715iso3yx$logunlocalgsstaffiniso3[d9715iso3yx$logunlocalgsstaffiniso3=="-Inf"]<-0

##################
#Time variables
d9715iso3yx$decade<- ifelse(d9715iso3yx$year< 2006,c(1),c(2)) 
d9715iso3yx$yearcount<-d9715iso3yx$year-1996


####################################################################
####################################################################
#Further aggegations

##################
#withouth visibility as a distinguishing factor (compare with d9715iso3yx above)
d9715iso3y<-
  d9715 %>%
  group_by(iso3,year,georeg,geosubreg) %>% 
  summarize(staffnumber=sum(staffnumber, na.rm=T),
            unlocalgsstaffiniso3=sum(unlocalgsstaffiniso3,na.rm=T),
            googlehits01yearlysum=sum(googlehits01,na.rm=T),
            factivahits01yearlysum=sum(factivahits01,na.rm=T),
            gni=mean(gni_currentusd,na.rm=T),
            unvoluntarybudgetarycontributions=mean(unvoluntarybudgetarycontributions,na.rm=T),
            population=mean(population,na.rm=T),
            tertiaryenrollmentint=mean(tertiaryenrollmentint,na.rm=T),
            worldlanguagefirst_eng=mean(worldlanguagefirst_eng,na.rm=T),
            worldlanguagefirst_eng_fre=mean(worldlanguagefirst_eng_fre,na.rm=T),
            governmenteffectiveness=mean(governmenteffectiveness,na.rm=T),
            polity=mean(polity,na.rm=T),
            gsstaffnumbernotinhq=mean(gsstaffnumbernotinhq,na.rm=T), 
            gsstaffnumberinhq=mean(gsstaffnumberinhq,na.rm=T),
            unfounding=mean(unfounding,na.rm=T),
            unfoundingsuccessor=mean(unfoundingsuccessor,na.rm=T),
            unp5=mean(unp5,na.rm=T),
            oecdmember1994=mean(oecdmember1994,na.rm=T),
            whostaffnumber=mean(whostaffnumber,na.rm=T),
            whostaffnumberw=mean(whostaffnumberw,na.rm=T),
            whostaffnumberws=mean(whostaffnumberws,na.rm=T),
            unsecstaffnumber=mean(unsecstaffnumber,na.rm=T),
            unsecstaffnumberw=mean(unsecstaffnumberw,na.rm=T),
            unsecstaffnumberws=mean(unsecstaffnumberws,na.rm=T))

##################
#generates aggregate number of staff members, in the given year (compare with d9715_igox above for lowly visible and highly visible IOs)
d9715igo<-
  d9715 %>%
  group_by(year) %>% 
  summarize(staffnumber_igosy=sum(staffnumber, na.rm=T))
d9715iso3y<-merge(d9715iso3y,d9715igo,by=c("year"))


####################################################################
####################################################################
#Variables construction (logs); analogous to construction above for aggregates across IO visibility levels

##################
#Population, GNI, GNI pc
d9715iso3y$logpopulation<-log10(d9715iso3y$population)
d9715iso3y$loggni<-log10(d9715iso3y$gni)
d9715iso3y$gnipc<-d9715iso3y$gni/d9715iso3y$population
d9715iso3y$loggnipc<-log10(d9715iso3y$gnipc)

##################
#Professional staff number
d9715iso3y$logstaffnumber<-log10(d9715iso3y$staffnumber)
d9715iso3y$logstaffnumber[d9715iso3y$logstaffnumber=='-Inf']<-NA

##################
#Budgetary contributions (voluntary)
d9715iso3y$logunvoluntarybudgetarycontributions<-log10(d9715iso3y$unvoluntarybudgetarycontributions)
d9715iso3y$logunvoluntarybudgetarycontributions[d9715iso3y$logunvoluntarybudgetarycontributions=='-Inf']<-0

##################
#Local operational activity (local/non-HQ/ general services staff)
d9715iso3y$logunlocalgsstaffiniso3<-log10(d9715iso3y$unlocalgsstaffiniso3)
d9715iso3y$logunlocalgsstaffiniso3[d9715iso3y$logunlocalgsstaffiniso3=="-Inf"]<-0

##################
#Time variables
d9715iso3y$decade<- ifelse(d9715iso3y$year< 2006,c(1),c(2)) 
d9715iso3y$yearcount<-d9715iso3y$year-1996

##################
#Explicitly set the order of dataframes (to make sure the Durbin-Watson test performs correctly later)
d9715iso3yx<-d9715iso3yx[order(d9715iso3yx$iso3,d9715iso3yx$yearcount),]
d9715iso3y<-d9715iso3y[order(d9715iso3y$iso3,d9715iso3y$yearcount),]


####################################################################
####################################################################
#Aggregate for country-level OLS models for Decade 1 (1997-2005)
#Followed by creation of standardized variables
#cs, as in d9705iso3cs, stands for cross-section, i.e. data for cross-sectional OLS models

##################
#Aggregation
d9705iso3y<-subset(d9715iso3y,d9715iso3y$decade==1)
d9705iso3cs<-
  d9705iso3y %>%
  group_by(iso3,georeg) %>% 
  summarize(staffnumberm=mean(staffnumber, na.rm=T),
            unlocalgsstaffiniso3m=mean(unlocalgsstaffiniso3,na.rm=T),
            gnim=mean(gni,na.rm=T),
            populationm=mean(population,na.rm=T),
            tertiaryenrollmentintm=mean(tertiaryenrollmentint,na.rm=T),
            worldlanguagefirst_engm=mean(worldlanguagefirst_eng,na.rm=T),
            unp5m=mean(unp5,na.rm=T),
            politym=mean(polity,na.rm=T),
            oecdmember1994m=mean(oecdmember1994,na.rm=T))#,

##################
#Marginal additions to prevent loss of observations; substantively perfectly meaningful
d9705iso3cs$logstaffnumberm<-log10(d9705iso3cs$staffnumberm+0.1)
d9705iso3cs$logunlocalgsstaffiniso3m<-log10(d9705iso3cs$unlocalgsstaffiniso3m+0.1)

##################
#Variables creation: logs and standardization
d9705iso3cs$logstaffnumbermstd<-c(scale(d9705iso3cs$logstaffnumberm))
d9705iso3cs$loggnim<-log10(d9705iso3cs$gnim)
d9705iso3cs$loggnimstd<-c(scale(d9705iso3cs$loggnim))
d9705iso3cs$logpopulationm<-log10(d9705iso3cs$populationm)
d9705iso3cs$logpopulationmstd<-c(scale(d9705iso3cs$logpopulationm))
d9705iso3cs$tertiaryenrollmentintmstd<-c(scale(d9705iso3cs$tertiaryenrollmentintm))
d9705iso3cs$logunlocalgsstaffiniso3m[d9705iso3cs$logunlocalgsstaffiniso3m=="-Inf"]<-NA
d9705iso3cs$logunlocalgsstaffiniso3mstd<-c(scale(d9705iso3cs$logunlocalgsstaffiniso3m))
d9705iso3cs$politymstd<-c(scale(d9705iso3cs$politym))
d9705iso3cs$staffsharem<-d9705iso3cs$staffnumberm/sum(d9705iso3cs$staffnumberm,na.rm=T)
d9705iso3cs$logstaffsharem<-log10(d9705iso3cs$staffsharem)
d9705iso3cs$logstaffsharem[d9705iso3cs$logstaffsharem=="-Inf"]<-NA


####################################################################
####################################################################
#Aggregate for country-level OLS models for Decade 2 (2006-2015)
#Followed by creation of standardized variables

##################
#Aggregation
d0615iso3y<-subset(d9715iso3y,d9715iso3y$decade==2)
d0615iso3cs<-
  d0615iso3y %>%
  group_by(iso3,georeg) %>% 
  summarize(staffnumberm=mean(staffnumber, na.rm=T),
            unlocalgsstaffiniso3m=mean(unlocalgsstaffiniso3,na.rm=T),
            gnim=mean(gni,na.rm=T),
            populationm=mean(population,na.rm=T),
            tertiaryenrollmentintm=mean(tertiaryenrollmentint,na.rm=T),
            worldlanguagefirst_engm=mean(worldlanguagefirst_eng,na.rm=T),
            unp5m=mean(unp5,na.rm=T),
            politym=mean(polity,na.rm=T),
            oecdmember1994m=mean(oecdmember1994,na.rm=T))

##################
#Marginal additions to prevent loss of observations; substantively perfectly meaningful
d0615iso3cs$logstaffnumberm<-log10(d0615iso3cs$staffnumberm+0.1)
d0615iso3cs$logunlocalgsstaffiniso3m<-log10(d0615iso3cs$unlocalgsstaffiniso3m+0.1)

##################
#Variables creation: logs and standardization
d0615iso3cs$logstaffnumberm[d0615iso3cs$logstaffnumberm=="-Inf"]<-NA
d0615iso3cs$logstaffnumbermstd<-c(scale(d0615iso3cs$logstaffnumberm))
d0615iso3cs$loggnim<-log10(d0615iso3cs$gnim)
d0615iso3cs$loggnimstd<-c(scale(d0615iso3cs$loggnim))
d0615iso3cs$logpopulationm<-log10(d0615iso3cs$populationm)
d0615iso3cs$logpopulationmstd<-c(scale(d0615iso3cs$logpopulationm))
d0615iso3cs$tertiaryenrollmentintmstd<-c(scale(d0615iso3cs$tertiaryenrollmentintm))
d0615iso3cs$logunlocalgsstaffiniso3m[d0615iso3cs$logunlocalgsstaffiniso3m=="-Inf"]<-NA
d0615iso3cs$logunlocalgsstaffiniso3mstd<-c(scale(d0615iso3cs$logunlocalgsstaffiniso3m))
d0615iso3cs$politymstd<-c(scale(d0615iso3cs$politym))
d0615iso3cs$staffsharem<-d0615iso3cs$staffnumberm/sum(d0615iso3cs$staffnumberm,na.rm=T)
d0615iso3cs$logstaffsharem<-log10(d0615iso3cs$staffsharem)
d0615iso3cs$logstaffsharem[d0615iso3cs$logstaffsharem=="-Inf"]<-NA


####################################################################
####################################################################
#Aggregate for country-level OLS models, with separate observations for highly visible IOs combined and lowly visible IOs combined
#For Decade 1 (1997-2005)
#Followed by creation of standardized variables
#csx, as in d9705iso3csx, stands for data for cross-sectional OLS models, with two observations for each country - one for its representation in lowly visible IOs, one in highly visible IOs

##################
#Aggregation
d9705iso3yx<-subset(d9715iso3yx,d9715iso3yx$decade==1)
d9705iso3csx<-
  d9705iso3yx %>%
  group_by(iso3,georeg,visible) %>% 
  summarize(staffnumberm=mean(staffnumber, na.rm=T),
            unlocalgsstaffiniso3m=mean(unlocalgsstaffiniso3,na.rm=T),
            gnim=mean(gni,na.rm=T),
            populationm=mean(population,na.rm=T),
            tertiaryenrollmentintm=mean(tertiaryenrollmentint,na.rm=T),
            worldlanguagefirst_engm=mean(worldlanguagefirst_eng,na.rm=T),
            unp5m=mean(unp5,na.rm=T),
            politym=mean(polity,na.rm=T),
            oecdmember1994m=mean(oecdmember1994,na.rm=T))

##################
#Marginal additions to prevent loss of observations; substantively perfectly meaningful
d9705iso3csx$logstaffnumberm<-log10(d9705iso3csx$staffnumberm+0.1)
d9705iso3csx$logunlocalgsstaffiniso3m<-log10(d9705iso3csx$unlocalgsstaffiniso3m+0.1)

##################
#Variables creation: logs and standardization
d9705iso3csx$logstaffnumberm[d9705iso3csx$logstaffnumberm=="-Inf"]<-NA
d9705iso3csx$logstaffnumbermstd<-c(scale(d9705iso3csx$logstaffnumberm))
d9705iso3csx$loggnim<-log10(d9705iso3csx$gnim)
d9705iso3csx$loggnimstd<-c(scale(d9705iso3csx$loggnim))
d9705iso3csx$logpopulationm<-log10(d9705iso3csx$populationm)
d9705iso3csx$logpopulationmstd<-c(scale(d9705iso3csx$logpopulationm))
d9705iso3csx$tertiaryenrollmentintmstd<-c(scale(d9705iso3csx$tertiaryenrollmentintm))
d9705iso3csx$logunlocalgsstaffiniso3m[d9705iso3csx$logunlocalgsstaffiniso3m=="-Inf"]<-NA
d9705iso3csx$logunlocalgsstaffiniso3mstd<-c(scale(d9705iso3csx$logunlocalgsstaffiniso3m))
d9705iso3csx$politymstd<-c(scale(d9705iso3csx$politym))
d9705iso3csx$staffsharem<-d9705iso3csx$staffnumberm/sum(d9705iso3csx$staffnumberm,na.rm=T)
d9705iso3csx$logstaffsharem<-log10(d9705iso3csx$staffsharem)
d9705iso3csx$logstaffsharem[d9705iso3csx$logstaffsharem=="-Inf"]<-NA


####################################################################
####################################################################
#Aggregate for country-level OLS models, with separate observations for highly visible IOs combined and lowly visible IOs combined
#For Decade 2 (2006-2015)
#Followed by creation of standardized variables

##################
#Aggregation
d0615iso3yx<-subset(d9715iso3yx,d9715iso3yx$decade==2)
d0615iso3csx<-
  d0615iso3yx %>%
  group_by(iso3,georeg,visible) %>% 
  summarize(staffnumberm=mean(staffnumber, na.rm=T),
            unlocalgsstaffiniso3m=mean(unlocalgsstaffiniso3,na.rm=T),
            gnim=mean(gni,na.rm=T),
            populationm=mean(population,na.rm=T),
            tertiaryenrollmentintm=mean(tertiaryenrollmentint,na.rm=T),
            worldlanguagefirst_engm=mean(worldlanguagefirst_eng,na.rm=T),
            unp5m=mean(unp5,na.rm=T),
            politym=mean(polity,na.rm=T),
            oecdmember1994m=mean(oecdmember1994,na.rm=T))

##################
#Marginal additions to prevent loss of observations; substantively perfectly meaningful
d0615iso3csx$logstaffnumberm<-log10(d0615iso3csx$staffnumberm+0.1)
d0615iso3csx$logunlocalgsstaffiniso3m<-log10(d0615iso3csx$unlocalgsstaffiniso3m+0.1)

##################
#Variables creation: logs and standardization
d0615iso3csx$logstaffnumberm[d0615iso3csx$logstaffnumberm=="-Inf"]<-NA
d0615iso3csx$logstaffnumbermstd<-c(scale(d0615iso3csx$logstaffnumberm))
d0615iso3csx$loggnim<-log10(d0615iso3csx$gnim)
d0615iso3csx$loggnimstd<-c(scale(d0615iso3csx$loggnim))
d0615iso3csx$logpopulationm<-log10(d0615iso3csx$populationm)
d0615iso3csx$logpopulationmstd<-c(scale(d0615iso3csx$logpopulationm))
d0615iso3csx$tertiaryenrollmentintmstd<-c(scale(d0615iso3csx$tertiaryenrollmentintm))
d0615iso3csx$logunlocalgsstaffiniso3m[d0615iso3csx$logunlocalgsstaffiniso3m=="-Inf"]<-NA
d0615iso3csx$logunlocalgsstaffiniso3mstd<-c(scale(d0615iso3csx$logunlocalgsstaffiniso3m))
d0615iso3csx$politymstd<-c(scale(d0615iso3csx$politym))
d0615iso3csx$staffsharem<-d0615iso3csx$staffnumberm/sum(d0615iso3csx$staffnumberm,na.rm=T)
d0615iso3csx$logstaffsharem<-log10(d0615iso3csx$staffsharem)
d0615iso3csx$logstaffsharem[d0615iso3csx$logstaffsharem=="-Inf"]<-NA



####################################################################
####################################################################
#Aggregate for country-level
#For the entire period (1997-2015)
#Followed by creation of standardized variables

##################
#Aggregation - creating means of variables for use in panel regression models in Table 1
d9715iso3cs<-
  d9715iso3y %>%
  group_by(iso3) %>% 
  summarize(staffnumberm=mean(staffnumber, na.rm=T),
            unlocalgsstaffiniso3m=mean(unlocalgsstaffiniso3,na.rm=T),
            gnim=mean(gni,na.rm=T),
            populationm=mean(population,na.rm=T),
            tertiaryenrollmentintm=mean(tertiaryenrollmentint,na.rm=T),
            worldlanguagefirst_engm=mean(worldlanguagefirst_eng,na.rm=T),
            unp5m=mean(unp5,na.rm=T),
            politym=mean(polity,na.rm=T),
            oecdmember1994m=mean(oecdmember1994,na.rm=T))

##################
#Marginal additions to prevent loss of observations; substantively perfectly meaningful
d9715iso3cs$logstaffnumberm<-log10(d9715iso3cs$staffnumberm+0.1)
d9715iso3cs$logunlocalgsstaffiniso3m<-log10(d9715iso3cs$unlocalgsstaffiniso3m+0.1)

##################
#Variables creation: logs and standardization
d9715iso3cs$logstaffnumberm[d9715iso3cs$logstaffnumberm=="-Inf"]<-NA
d9715iso3cs$logstaffnumbermstd<-c(scale(d9715iso3cs$logstaffnumberm))
d9715iso3cs$loggnim<-log10(d9715iso3cs$gnim)
d9715iso3cs$loggnimstd<-c(scale(d9715iso3cs$loggnim))
d9715iso3cs$logpopulationm<-log10(d9715iso3cs$populationm)
d9715iso3cs$logpopulationmstd<-c(scale(d9715iso3cs$logpopulationm))
d9715iso3cs$tertiaryenrollmentintmstd<-c(scale(d9715iso3cs$tertiaryenrollmentintm))
d9715iso3cs$logunlocalgsstaffiniso3m[d9715iso3cs$logunlocalgsstaffiniso3m=="-Inf"]<-NA
d9715iso3cs$logunlocalgsstaffiniso3mstd<-c(scale(d9715iso3cs$logunlocalgsstaffiniso3m))
d9715iso3cs$politymstd<-c(scale(d9715iso3cs$politym))
d9715iso3cs$staffsharem<-d9715iso3cs$staffnumberm/sum(d9715iso3cs$staffnumberm,na.rm=T)
d9715iso3cs$logstaffsharem<-log10(d9715iso3cs$staffsharem)
d9715iso3cs$logstaffsharem[d9715iso3cs$logstaffsharem=="-Inf"]<-NA



####################################################################
####################################################################
#Construction of panel dataset for panel regression in Table 1
#construction of means and de-meaned variables, standardization (country-year level data)

##################
#turn data into panel data frame
pd9715iso3y<-pdata.frame(d9715iso3y, index = c("iso3", "year"))
#merge with cross-sectional (mean) data
pd9715<-merge(pd9715iso3y,d9715iso3cs,by=c("iso3"))

##################
#Variables construction: staff shares used in robustness models
pd9715$staffsharem<-pd9715$staffnumber/pd9715$staffnumber_igosy
pd9715$logstaffsharem<-log10(pd9715$staffsharem)
pd9715$logstaffsharem[pd9715$logstaffsharem=="-Inf"]<-NA
pd9715$logstaffsharemstd<-c(scale(pd9715$logstaffsharem))


##################
#Variables creation: de-meaned variables
pd9715$loggnidm<-pd9715$loggni-pd9715$loggnim
pd9715$logpopulationdm<-pd9715$logpopulation-pd9715$logpopulationm
pd9715$tertiaryenrollmentintdm<-pd9715$tertiaryenrollmentint-pd9715$tertiaryenrollmentintm
pd9715$logunlocalgsstaffiniso3dm<-pd9715$logunlocalgsstaffiniso3-pd9715$logunlocalgsstaffiniso3m
pd9715$politydm<-pd9715$polity-pd9715$politym

##################
#Variables creation: standardization
pd9715$logstaffnumberstd<-c(scale(pd9715$logstaffnumber))
pd9715$loggnidmstd<-c(scale(pd9715$loggnidm))
pd9715$loggnimstd<-c(scale(pd9715$loggnim))
pd9715$logpopulationdmstd<-c(scale(pd9715$logpopulationdm))
pd9715$logpopulationmstd<-c(scale(pd9715$logpopulationm))
pd9715$tertiaryenrollmentintdmstd<-c(scale(pd9715$tertiaryenrollmentintdm))
pd9715$tertiaryenrollmentintmstd<-c(scale(pd9715$tertiaryenrollmentintm))
pd9715$logunlocalgsstaffiniso3dmstd<-c(scale(pd9715$logunlocalgsstaffiniso3dm))
pd9715$logunlocalgsstaffiniso3mstd<-c(scale(pd9715$logunlocalgsstaffiniso3m))
pd9715$politydmstd<-c(scale(pd9715$politydm))
pd9715$politymstd<-c(scale(pd9715$politym))

##################
#Variables creation: further standardization
pd9715$loggnistd<-c(scale(pd9715$loggni))
pd9715$logpopulationstd<-c(scale(pd9715$logpopulation))
pd9715$tertiaryenrollmentintstd<-c(scale(pd9715$tertiaryenrollmentint))
pd9715$logunlocalgsstaffiniso3std<-c(scale(pd9715$logunlocalgsstaffiniso3))
pd9715$politystd<-c(scale(pd9715$polity))


####################################################################
####################################################################
#Aggregate for country-IO visibility level
#For the entire period (1997-2015)
#Followed by creation of standardized variables

##################
#Aggregation - creating means of variables across states and binary IO visibility measure
#x in d9715iso3csx stands for separate values (two observations) for highly visibly and lowly visible IO, for each country
d9715iso3csx<-
  d9715iso3yx %>%
  group_by(iso3,visible) %>% #,decade
  summarize(staffnumberm=mean(staffnumber, na.rm=T),
            unlocalgsstaffiniso3m=mean(unlocalgsstaffiniso3,na.rm=T),
            gnim=mean(gni,na.rm=T),
            unvoluntarybudgetarycontributionsm=mean(unvoluntarybudgetarycontributions,na.rm=T),
            populationm=mean(population,na.rm=T),
            tertiaryenrollmentintm=mean(tertiaryenrollmentint,na.rm=T),
            worldlanguagefirst_engm=mean(worldlanguagefirst_eng,na.rm=T),
            worldlanguagefirst_eng_frem=mean(worldlanguagefirst_eng_fre,na.rm=T),
            unp5m=mean(unp5,na.rm=T),
            politym=mean(polity,na.rm=T),
            oecdmember1994m=mean(oecdmember1994,na.rm=T))

##################
#Marginal additions to prevent loss of observations; substantively perfectly meaningful
d9715iso3csx$logstaffnumberm<-log10(d9715iso3csx$staffnumberm+0.1)
d9715iso3csx$logunlocalgsstaffiniso3m<-log10(d9715iso3csx$unlocalgsstaffiniso3m+0.1)

##################
#Variables creation: logs and standardization
d9715iso3csx$logstaffnumberm[d9715iso3csx$logstaffnumberm=="-Inf"]<-NA
d9715iso3csx$logstaffnumbermstd<-c(scale(d9715iso3csx$logstaffnumberm))
d9715iso3csx$loggnim<-log10(d9715iso3csx$gnim)
d9715iso3csx$loggnimstd<-c(scale(d9715iso3csx$loggnim))
d9715iso3csx$logunvoluntarybudgetarycontributionsm<-log10(d9715iso3csx$unvoluntarybudgetarycontributionsm)
d9715iso3csx$logunvoluntarybudgetarycontributionsm[d9715iso3csx$logunvoluntarybudgetarycontributionsm=="-Inf"]<-NA
d9715iso3csx$logunvoluntarybudgetarycontributionsmstd<-c(scale(d9715iso3csx$logunvoluntarybudgetarycontributionsm))
d9715iso3csx$logpopulationm<-log10(d9715iso3csx$populationm)
d9715iso3csx$logpopulationmstd<-c(scale(d9715iso3csx$logpopulationm))
d9715iso3csx$tertiaryenrollmentintmstd<-c(scale(d9715iso3csx$tertiaryenrollmentintm))
d9715iso3csx$logunlocalgsstaffiniso3m[d9715iso3csx$logunlocalgsstaffiniso3m=="-Inf"]<-NA
d9715iso3csx$logunlocalgsstaffiniso3mstd<-c(scale(d9715iso3csx$logunlocalgsstaffiniso3m))
d9715iso3csx$politymstd<-c(scale(d9715iso3csx$politym))



####################################################################
####################################################################
#Construction of panel dataset means and de-meaned variables, standardization (country-year-IO visibility level data)
#This dataset (pd9715x) is used when we analyze separately and compare highly and lowly visibly IOs, always by subsetting form this dataset one of the two groups of observations
#Otherwise, it is perfectly analogous to the main panel dataframe (pd9715)

##################
#turn data into panel data frame
pd9715iso3yx<-pdata.frame(d9715iso3yx, index = c("iso3", "year"))
pd9715x<-merge(pd9715iso3yx,d9715iso3csx,by=c("iso3","visible"))

##################
#Variables construction: staff shares used in robustness models
pd9715x$staffsharem<-pd9715x$staffnumber/pd9715x$staffnumber_vis_igosy
pd9715x$logstaffsharem<-log10(pd9715x$staffsharem)
pd9715x$logstaffsharem[pd9715x$logstaffsharem=="-Inf"]<-NA
pd9715x$logstaffsharemstd<-c(scale(pd9715x$logstaffsharem))

##################
#Variables creation: de-meaned variables
pd9715x$loggnidm<-pd9715x$loggni-pd9715x$loggnim
pd9715x$logpopulationdm<-pd9715x$logpopulation-pd9715x$logpopulationm
pd9715x$tertiaryenrollmentintdm<-pd9715x$tertiaryenrollmentint-pd9715x$tertiaryenrollmentintm
pd9715x$logunlocalgsstaffiniso3dm<-pd9715x$logunlocalgsstaffiniso3-pd9715x$logunlocalgsstaffiniso3m
pd9715x$politydm<-pd9715x$polity-pd9715x$politym
pd9715x$logunvoluntarybudgetarycontributionsdm<-pd9715x$logunvoluntarybudgetarycontributions-pd9715x$logunvoluntarybudgetarycontributionsm

##################
#Variables creation: standardization
pd9715x$logstaffnumberstd<-c(scale(pd9715x$logstaffnumber))
pd9715x$loggnidmstd<-c(scale(pd9715x$loggnidm))
pd9715x$loggnimstd<-c(scale(pd9715x$loggnim))
pd9715x$logpopulationdmstd<-c(scale(pd9715x$logpopulationdm))
pd9715x$logpopulationmstd<-c(scale(pd9715x$logpopulationm))
pd9715x$tertiaryenrollmentintdmstd<-c(scale(pd9715x$tertiaryenrollmentintdm))
pd9715x$tertiaryenrollmentintmstd<-c(scale(pd9715x$tertiaryenrollmentintm))
pd9715x$logunvoluntarybudgetarycontributionsdmstd<-c(scale(pd9715x$logunvoluntarybudgetarycontributionsdm))
pd9715x$logunvoluntarybudgetarycontributionsmstd<-c(scale(pd9715x$logunvoluntarybudgetarycontributionsm))
pd9715x$logunlocalgsstaffiniso3dmstd<-c(scale(pd9715x$logunlocalgsstaffiniso3dm))
pd9715x$logunlocalgsstaffiniso3mstd<-c(scale(pd9715x$logunlocalgsstaffiniso3m))
pd9715x$politydmstd<-c(scale(pd9715x$politydm))
pd9715x$politymstd<-c(scale(pd9715x$politym))

##################
#Variables creation: further standardization
pd9715x$loggnistd<-c(scale(pd9715x$loggni))
pd9715x$logpopulationstd<-c(scale(pd9715x$logpopulation))
pd9715x$tertiaryenrollmentintstd<-c(scale(pd9715x$tertiaryenrollmentint))
pd9715x$logunlocalgsstaffiniso3std<-c(scale(pd9715x$logunlocalgsstaffiniso3))
pd9715x$politystd<-c(scale(pd9715x$polity))






####################################################################
####################################################################
#Table 1 models

##################
#Model 1 (Table 1); OSL regression, period 1997-2005, no controls
reg_cs9705a<-lm(logstaffnumbermstd~
           loggnimstd+
           logpopulationmstd+
           tertiaryenrollmentintmstd+
           logunlocalgsstaffiniso3mstd
           ,data=d9705iso3cs, na.action=na.exclude) 
summary(reg_cs9705a)

##################
#Model 1 robust standard errors calculation (saved for export)
vcov_reg_cs9705a_formula <- vcovHC(reg_cs9705a, type="HC1")
coeftest(reg_cs9705a,vcov_reg_cs9705a_formula)
reg_cs9705arse <- sqrt(diag(vcov_reg_cs9705a_formula))
vif(reg_cs9705a)
bptest(reg_cs9705a)
dwtest(reg_cs9705a)


##################
#Model 2 (Table 1); OSL regression, period 1997-2005, controls included
reg_cs9705b<-lm(logstaffnumbermstd~
            loggnimstd+
            logpopulationmstd+
            tertiaryenrollmentintmstd+
            logunlocalgsstaffiniso3mstd+
            politym+
            unp5m+
            worldlanguagefirst_engm
            ,data=d9705iso3cs, na.action=na.exclude) 
summary(reg_cs9705b)

##################
#Model 2 robust standard errors calculation (saved for export)
vcov_reg_cs9705b_formula <- vcovHC(reg_cs9705b, type="HC1")
coeftest(reg_cs9705b,vcov_reg_cs9705b_formula)
reg_cs9705brse <- sqrt(diag(vcov_reg_cs9705b_formula))
vif(reg_cs9705b)
bptest(reg_cs9705b)
dwtest(reg_cs9705b)


##################
#Model 3 (Table 1); OSL regression, period 2006-2015, no controls
reg_cs0615a<-lm(logstaffnumbermstd~
           loggnimstd+
           logpopulationmstd+
           tertiaryenrollmentintmstd+
           logunlocalgsstaffiniso3mstd,
           data=d0615iso3cs, na.action=na.exclude) 
summary(reg_cs0615a)

##################
#Model 3 robust standard errors calculation (saved for export)
vcov_reg_cs0615a_formula <- vcovHC(reg_cs0615a, type="HC1")
coeftest(reg_cs0615a,vcov_reg_cs0615a_formula)
reg_cs0615arse <- sqrt(diag(vcov_reg_cs0615a_formula))

vif(reg_cs0615a)
bptest(reg_cs0615a)
dwtest(reg_cs0615a)


##################
#Model 4 (Table 1); OSL regression, period 2006-2015, controls included
reg_cs0615b<-lm(logstaffnumbermstd~
            loggnimstd+
            logpopulationmstd+
            tertiaryenrollmentintmstd+
            logunlocalgsstaffiniso3mstd+
            politym+
            unp5m+
            worldlanguagefirst_engm,
            data=d0615iso3cs, na.action=na.exclude) 
summary(reg_cs0615b)

##################
#Model 4 robust standard errors calculation (saved for export)
vcov_reg_cs0615b_formula <- vcovHC(reg_cs0615b, type="HC1")
coeftest(reg_cs0615b,vcov_reg_cs0615b_formula)
reg_cs0615brse <- sqrt(diag(vcov_reg_cs0615b_formula))
vif(reg_cs0615b)
bptest(reg_cs0615b)
dwtest(reg_cs0615b)



##################
#Model 5 (Table 1); panel regression, period 1997-2015, controls included
#variable ending "dm" signifies de-meaned scores, while ending "m" signifies mean scores (average across period 1997-2015 constructed above)
#create a dataframe with only the pertinent variables and no missing values, to allow for plot_model generation later on (avioding problems with missing values)
pd9715c<-pd9715%>%select(iso3,year,logstaffnumberstd,loggnidmstd,loggnimstd,logpopulationdmstd,
                         logpopulationmstd,tertiaryenrollmentintdmstd,tertiaryenrollmentintmstd,
                         logunlocalgsstaffiniso3dmstd,logunlocalgsstaffiniso3mstd,politydm,
                         politym,unp5m,worldlanguagefirst_engm,yearcount)
pd9715c<-pd9715c%>%drop_na()

preg_pd9715<-plm(logstaffnumberstd~
                   loggnidmstd+
                   loggnimstd+
                   logpopulationdmstd+
                   logpopulationmstd+
                   tertiaryenrollmentintdmstd+
                   tertiaryenrollmentintmstd+
                   logunlocalgsstaffiniso3dmstd+
                   logunlocalgsstaffiniso3mstd+
                   politydm+
                   politym+
                   unp5m+
                   worldlanguagefirst_engm+
                   yearcount+
                   yearcount*loggnimstd+
                   yearcount*logpopulationmstd,
                 data=pd9715c,index=c("iso3", "year"), model="random", na.action=na.exclude)
summary(preg_pd9715)

##################
#Model 5 clustered robust standard errors calculation (saved for export)
vcov_preg_pd9715_formula<-vcovHC(preg_pd9715,type="HC0",cluster="group")
coeftest(preg_pd9715, vcov=vcovHC(preg_pd9715,type="HC0",cluster="group"))
preg_pd9715rse <- sqrt(diag(vcov_preg_pd9715_formula))


##################
#Model 6 (Table 1); panel regression, period 1997-2015, controls included
#observations only for lowly visible IOs
pd9715xnv<-subset(pd9715x,pd9715x$visible==0)
preg_pd9715nv<-plm(logstaffnumberstd~
                loggnidmstd+
                loggnimstd+
                logpopulationdmstd+
                logpopulationmstd+
                tertiaryenrollmentintdmstd+
                tertiaryenrollmentintmstd+
                logunlocalgsstaffiniso3dmstd+
                logunlocalgsstaffiniso3mstd+
                politydm+
                politym+
                unp5m+
                worldlanguagefirst_engm+
                yearcount*loggnimstd+
                yearcount*logpopulationmstd,
                data=pd9715xnv, index=c("iso3", "year"), model="random", na.action=na.exclude)
summary(preg_pd9715nv)

##################
#Model 6 clustered robust standard errors calculation (saved for export)
vcov_preg_pd9715nv_formula<-vcovHC(preg_pd9715nv,type="HC0",cluster="group")
preg_pd9715nvrse <- sqrt(diag(vcov_preg_pd9715nv_formula))
coeftest(preg_pd9715nv, vcov=vcovHC(preg_pd9715nv,type="HC0",cluster="group"))
pbgtest(preg_pd9715nv)
pdwtest(preg_pd9715nv)



##################
#Model 7 (Table 1); panel regression, period 1997-2015, controls included
#observations only for highly visible IOs
pd9715xv<-subset(pd9715x,pd9715x$visible==1)
preg_pd9715v<-plm(logstaffnumberstd~
               loggnidmstd+
               loggnimstd+
               logpopulationdmstd+
               logpopulationmstd+
               tertiaryenrollmentintdmstd+
               tertiaryenrollmentintmstd+
               logunlocalgsstaffiniso3dmstd+
               logunlocalgsstaffiniso3mstd+
               politydm+
               politym+
               unp5m+
               worldlanguagefirst_engm+
               yearcount*loggnimstd+
               yearcount*logpopulationmstd,
               data=pd9715xv, index=c("iso3", "year"), model="random", na.action=na.exclude)
summary(preg_pd9715v)

##################
#Model 7 clustered robust standard errors calculation (saved for export)
vcov_preg_pd9715v_formula<-vcovHC(preg_pd9715v,type="HC0",cluster="group")
preg_pd9715vrse <- sqrt(diag(vcov_preg_pd9715v_formula))
coeftest(preg_pd9715v, vcov=vcovHC(preg_pd9715v,type="HC0",cluster="group"))
pbgtest(preg_pd9715v)
pdwtest(preg_pd9715v)


##################
#Table 1 export
regs_main<-stargazer(reg_cs9705a,reg_cs0615a,reg_cs9705b,reg_cs0615b,preg_pd9715,preg_pd9715nv,preg_pd9715v,
                         covariate.labels = c("GNI (log) (within)","GNI (log) (between)",
                                              "Population (log) (within)","Population (log) (between)",
                                              "University enrollment (within)","University enrollment (between)",
                                              "Local IO activity (log) (within)","Local IO activity (log) (between)",
                                              "Polity (within)","Polity (between)",
                                              "UN SC permanent seat",
                                              "English official lang.",
                                              "Year count (Yrc)",
                                              "GNI (log) (between)* Yrc",
                                              "Population (log) (between)* Yrc"),
                         dep.var.caption="",
                         dep.var.labels=c("Staff number (log) (std); OLS","Staff number (log)  (std); panel regression"),
                         model.names=F,
                         model.numbers = T,
                         column.labels=c("1997-2005","2006-2015","1997-2005","2006-2015","All IOs","Lowly visible IOs","Highly visible IOs"),
                         star.cutoffs = c(0.1,0.05, 0.01, 0.001),
                         star.char = c("'", "*", "**","***"),
                         se=list(reg_cs9705arse,reg_cs0615arse,reg_cs9705brse,reg_cs0615brse,preg_pd9715rse,preg_pd9715nvrse,preg_pd9715vrse),
                         single.row=T, no.space=T, omit.stat = c("f", "ser"), type="html",
                         notes = "'p<0.1; *p<0.05; **p<0.01; ***p<0.001",
                         notes.append = FALSE)

write.table(regs_main, file="t1_regs_main.html",col.names=F, row.names=FALSE,quote=FALSE)



####################################################################
####################################################################
#Figure 2 constructions

##################
#Effects for GNI
plot_model_gni<-plot_model(preg_pd9715, type = "pred", terms = c("yearcount","loggnimstd  [-1,0,1]"), 
                           title="Decreasing effect of GNI",show.legend = F,
                           colors="bw")+
  ylim(-1,1.5)+
  ylab("Predicted staff number (log) (std)")+
  scale_x_continuous(name="", breaks=c(1,4,9,14,19), labels=c("1997","2000","2005","2010","2015"))+
  theme_classic(base_size = 12)
plot_model_gni

##################
#Effects for population size
plot_model_population<-plot_model(preg_pd9715, type = "pred", terms = c("yearcount","logpopulationmstd [-1,0,1]"),
                                  title="Increasing effect of population",show.legend = F,
                                  colors="bw")+
  ylim(-1,1.5)+
  ylab("Predicted staff number (log) (std)")+
  scale_x_continuous(name="", breaks=c(1,4,9,14,19), labels=c("1997","2000","2005","2010","2015"))+
  theme_classic(base_size = 12)
plot_model_population

##################
#Figure export
jpeg(filename="fig2_gni_population_interact_yrc2.jpg", width=6.5, height=4, units="in", res=300)  
grid.arrange(plot_model_gni,plot_model_population,nrow = 1)
dev.off()




####################################################################
####################################################################
#Table A8 models

##################
#
preg_pd9715_pool<-plm(logstaffnumberstd~
                        loggnistd+
                        logpopulationstd+
                        tertiaryenrollmentintstd+
                        logunlocalgsstaffiniso3std+
                        polity+
                        unp5+
                        worldlanguagefirst_eng+
                        yearcount*loggnistd+
                        yearcount*logpopulationstd,
                      data=pd9715,index=c("iso3", "year"), model="pool", na.action=na.exclude)
summary(preg_pd9715_pool)
vcov_preg_pd9715_pool_formula<-vcovHC(preg_pd9715_pool,type="HC0",cluster="group")
preg_pd9715_poolrse <- sqrt(diag(vcov_preg_pd9715_pool_formula))
coeftest(preg_pd9715_pool, vcov=vcovHC(preg_pd9715_pool,type="HC0",cluster="group"))


##################
#
preg_pd9715_poolnv<-plm(logstaffnumberstd~
                      loggnistd+
                      logpopulationstd+
                      tertiaryenrollmentintstd+
                      logunlocalgsstaffiniso3std+
                      polity+
                      unp5+
                      worldlanguagefirst_eng+
                      yearcount*loggnistd+
                      yearcount*logpopulationstd,
                      data=pd9715xnv, index=c("iso3", "year"), model="pool", na.action=na.exclude)
summary(preg_pd9715_poolnv)
vcov_preg_pd9715_poolnv_formula<-vcovHC(preg_pd9715_poolnv,type="HC0",cluster="group")
preg_pd9715_poolnvrse <- sqrt(diag(vcov_preg_pd9715_poolnv_formula))
coeftest(preg_pd9715_poolnv, vcov=vcovHC(preg_pd9715_poolnv,type="HC0",cluster="group"))


##################
#exposed, 1996-2015
preg_pd9715_poolv<-plm(logstaffnumberstd~
                     loggnistd+
                     logpopulationstd+
                     tertiaryenrollmentintstd+
                     logunlocalgsstaffiniso3std+
                     polity+
                     unp5+
                     worldlanguagefirst_eng+
                     yearcount*loggnistd+
                     yearcount*logpopulationstd,
                     data=pd9715xv, index=c("iso3", "year"), model="pool", na.action=na.exclude)
summary(preg_pd9715_poolv)
vcov_preg_pd9715_poolv_formula<-vcovHC(preg_pd9715_poolv,type="HC0",cluster="group")
preg_pd9715_poolvrse <- sqrt(diag(vcov_preg_pd9715_poolv_formula))
coeftest(preg_pd9715_poolv, vcov=vcovHC(preg_pd9715_poolv,type="HC0",cluster="group"))

dwtest(preg_pd9715_poolv)
pbgtest(preg_pd9715_poolv)
pdwtest(preg_pd9715_poolv)



##################
#
regs_ta8<-stargazer(preg_pd9715_pool, preg_pd9715_poolnv,preg_pd9715_poolv,
                    covariate.labels = c("GNI(log)",
                                         "Population(log)",
                                         "University enrollment",
                                         "Local IO activity(log)",
                                         "Polity",
                                         "UN SC permanent seat",
                                         "English official lang.",
                                         "Year count (Yrc)",
                                         "GNI(log) * Yrc",
                                         "Population(log) * Yrc"),
                    dep.var.labels=c("Staff number (log)(std)"),
                    column.labels=c("All IOs", "Lowly visible IOs","Highly visible IOs"),
                    star.cutoffs = c(0.1,0.05, 0.01, 0.001),
                    star.char = c("'", "*", "**","***"),
                    se=list(preg_pd9715_poolrse, preg_pd9715_poolnvrse,preg_pd9715_poolvrse),
                    single.row=T, no.space=T, omit.stat = c("f", "ser"), type="html",
                    notes = "'p<0.1; *p<0.05; **p<0.01; ***p<0.001; country-clustered robust standard errors in brackets",
                    notes.append = FALSE)

write.table(regs_ta8, file="ta8_regs_pooled.html",col.names=F, row.names=FALSE,quote=FALSE)



####################################################################
####################################################################
#Table A9 models

preg_pd9715_share<-plm(logstaffsharemstd~
                         loggnidmstd+
                         loggnimstd+
                         logpopulationdmstd+
                         logpopulationmstd+
                         tertiaryenrollmentintdmstd+
                         tertiaryenrollmentintmstd+
                         logunlocalgsstaffiniso3dmstd+
                         logunlocalgsstaffiniso3mstd+
                         politydm+
                         politym+
                         unp5m+
                         worldlanguagefirst_engm+
                         yearcount*loggnimstd+
                         yearcount*logpopulationmstd,
                       data=pd9715,index=c("iso3", "year"), model="random", na.action=na.exclude)
summary(preg_pd9715_share)
vcov_preg_pd9715_share_formula<-vcovHC(preg_pd9715_share,type="HC0",cluster="group")
preg_pd9715_sharerse <- sqrt(diag(vcov_preg_pd9715_share_formula))
coeftest(preg_pd9715_share, vcov=vcovHC(preg_pd9715_share,type="HC0",cluster="group"))


##################
#
pd9715xnv<-subset(pd9715x,pd9715x$visible==0)
preg_pd9715_sharenv<-plm(logstaffsharemstd~
                       loggnidmstd+
                       loggnimstd+
                       logpopulationdmstd+
                       logpopulationmstd+
                       tertiaryenrollmentintdmstd+
                       tertiaryenrollmentintmstd+
                       logunlocalgsstaffiniso3dmstd+
                       logunlocalgsstaffiniso3mstd+
                       politydm+
                       politym+
                       unp5m+
                       worldlanguagefirst_engm+
                       yearcount*loggnimstd+
                       yearcount*logpopulationmstd,
                       data=pd9715xnv, index=c("iso3", "year"), model="random", na.action=na.exclude)
summary(preg_pd9715_sharenv)
vcov_preg_pd9715_sharenv_formula<-vcovHC(preg_pd9715_sharenv,type="HC0",cluster="group")
preg_pd9715_sharenvrse <- sqrt(diag(vcov_preg_pd9715_sharenv_formula))
coeftest(preg_pd9715_sharenv, vcov=vcovHC(preg_pd9715_sharenv,type="HC0",cluster="group"))



##################
#
pd9715xv<-subset(pd9715x,pd9715x$visible==1)
preg_pd9715_sharev<-plm(logstaffsharemstd~
                      loggnidmstd+
                      loggnimstd+
                      logpopulationdmstd+
                      logpopulationmstd+
                      tertiaryenrollmentintdmstd+
                      tertiaryenrollmentintmstd+
                      logunlocalgsstaffiniso3dmstd+
                      logunlocalgsstaffiniso3mstd+
                      politydm+
                      politym+
                      unp5m+
                      worldlanguagefirst_engm+
                      yearcount*loggnimstd+
                      yearcount*logpopulationmstd,
                      data=pd9715xv, index=c("iso3", "year"), model="random", na.action=na.exclude)
summary(preg_pd9715_sharev)
vcov_preg_pd9715_sharev_formula<-vcovHC(preg_pd9715_sharev,type="HC0",cluster="group")
preg_pd9715_sharevrse <- sqrt(diag(vcov_preg_pd9715_sharev_formula))
coeftest(preg_pd9715_sharev, vcov=vcovHC(preg_pd9715_sharev,type="HC0",cluster="group"))

dwtest(preg_pd9715_sharev)
pbgtest(preg_pd9715_sharev)
pdwtest(preg_pd9715_sharev)


##################
#
regs_ta9<-stargazer(preg_pd9715_share, preg_pd9715_sharenv,preg_pd9715_sharev,
                    covariate.labels = c("GNI (log) (within)","GNI (log) (between)",
                                         "Population (log) (within)","Population (log) (between)",
                                         "University enrollment (within)","University enrollment (between)",
                                         "Local IO activity (log) (within)","Local IO activity (log) (between)",
                                         "Polity (within)","Polity (between)",
                                         "UN SC permanent seat",
                                         "English official lang.",
                                         "Year count (Yrc)",
                                         "GNI (log) (between)* Yrc",
                                         "Population (log) (between)* Yrc"),
                    dep.var.labels=c("Staff share (%) (log)"),
                    column.labels=c("All IOs", "Lowly visible IOs","Highly visible IOs"),
                    star.cutoffs = c(0.1,0.05, 0.01, 0.001),
                    star.char = c("'", "*", "**","***"),
                    se=list(preg_pd9715_sharerse, preg_pd9715_sharenvrse,preg_pd9715_sharevrse),
                    single.row=T, no.space=T, omit.stat = c("f", "ser"), type="html",
                    notes = "'p<0.1; *p<0.05; **p<0.01; ***p<0.001; country-clustered robust standard errors in brackets",
                    notes.append = FALSE)

write.table(regs_ta9, file="ta9_regs_percentshare_as_dv.html",col.names=F, row.names=FALSE,quote=FALSE)



####################################################################
####################################################################
#Table A10 models

##################
#
d9705iso3csxnv<-subset(d9705iso3csx,d9705iso3csx$visible==0)
rega_cs9705nv<-lm(logstaffnumbermstd~
                     loggnimstd+
             logpopulationmstd+
             tertiaryenrollmentintmstd+
             logunlocalgsstaffiniso3mstd+
             unp5m+
             worldlanguagefirst_engm+
             politym,
             data=d9705iso3csxnv, na.action=na.exclude) 
summary(rega_cs9705nv)
vif(rega_cs9705nv)
nobs(rega_cs9705nv)
vcov_rega_cs9705nv_formula <- vcovHC(rega_cs9705nv, type="HC1")
coeftest(rega_cs9705nv,vcov_rega_cs9705nv_formula)
rega_cs9705nvrse <- sqrt(diag(vcov_rega_cs9705nv_formula))
bptest(rega_cs9705nv)
dwtest(rega_cs9705nv)


##################
#
d9705iso3csxnv<-subset(d9705iso3csx,d9705iso3csx$visible==1)
rega_cs9705v<-lm(logstaffnumbermstd~
            loggnimstd+
            logpopulationmstd+
            tertiaryenrollmentintmstd+
            logunlocalgsstaffiniso3mstd+
            unp5m+
            worldlanguagefirst_engm+
            politym,
            data=d9705iso3csxnv, na.action=na.exclude) 
summary(rega_cs9705v)
vif(rega_cs9705v)
nobs(rega_cs9705v)
vcov_rega_cs9705v_formula <- vcovHC(rega_cs9705v, type="HC1")
coeftest(rega_cs9705v,vcov_rega_cs9705v_formula)
rega_cs9705vrse <- sqrt(diag(vcov_rega_cs9705v_formula))
bptest(rega_cs9705v)
dwtest(rega_cs9705v)

##################
#
d0615iso3csxnv<-subset(d0615iso3csx,d0615iso3csx$visible==0)
rega_cs0615nv<-lm(logstaffnumbermstd~
             loggnimstd+
             logpopulationmstd+
             tertiaryenrollmentintmstd+
             logunlocalgsstaffiniso3mstd+
             unp5m+
             worldlanguagefirst_engm+
             politym,
             data=d0615iso3csxnv, na.action=na.exclude) 
summary(rega_cs0615nv)
vif(rega_cs0615nv)
nobs(rega_cs0615nv)
vcov_rega_cs0615nv_formula <- vcovHC(rega_cs0615nv, type="HC1")
coeftest(rega_cs0615nv,vcov_rega_cs0615nv_formula)
rega_cs0615nvrse <- sqrt(diag(vcov_rega_cs0615nv_formula))
bptest(rega_cs0615nv)
dwtest(rega_cs0615nv)


##################
#
d0615iso3csxv<-subset(d0615iso3csx,d0615iso3csx$visible==1)
rega_cs0615v<-lm(logstaffnumbermstd~
            loggnimstd+
            logpopulationmstd+
            tertiaryenrollmentintmstd+
            logunlocalgsstaffiniso3mstd+
            unp5m+
            worldlanguagefirst_engm+
            politym,
            data=d0615iso3csxv, na.action=na.exclude) 
summary(rega_cs0615v)
vif(rega_cs0615v)
nobs(rega_cs0615v)
vcov_rega_cs0615v_formula <- vcovHC(rega_cs0615v, type="HC1")
coeftest(rega_cs0615v,vcov_rega_cs0615v_formula)
rega_cs0615vrse <- sqrt(diag(vcov_rega_cs0615v_formula))
bptest(rega_cs0615v)
dwtest(rega_cs0615v)

##################
#
regs_ta10<-stargazer(rega_cs9705nv,rega_cs9705v,rega_cs0615nv,rega_cs0615v,
                    covariate.labels = c("GNI(log)",
                                         "Population(log)",
                                         "University enrollment",
                                         "Local IO activity(log)",
                                         "UN SC permanent seat",
                                         "English official lang.",
                                         "Polity"),
                    dep.var.labels=c("Staff number(log)(std)"),
                    column.labels=c("Lowly visible IOs, 1997-2005",
                                    "Highly visible IOs, 1997-2005",
                                    "Lowly visible IOs, 2006-2015",
                                    "Highly visible IOs, 2006-2015"),
                    star.cutoffs = c(0.1,0.05, 0.01, 0.001),
                    star.char = c("'", "*", "**","***"),
                    se=list(rega_cs9705nvrse,rega_cs9705vrse,rega_cs0615nvrse,rega_cs0615vrse),
                    single.row=T, no.space=T, omit.stat = c("f", "ser"), type="html",
                    notes = "'p<0.1; *p<0.05; **p<0.01; ***p<0.001; robust standard errors in brackets",
                    notes.append = FALSE)

write.table(regs_ta10, file="ta10_regs_cross-sectional.html",col.names=F, row.names=FALSE,quote=FALSE)


####################################################################
####################################################################
#Table A11 models

##################
#from model 4 onwards in this Table, use Google as a measure of visibility
#to do that, return back to beginning of data, reload the main dataset and set d9715$visible<-d9715$visibleg at around line 70)
#after this analysis, for other models and Tables, go back to Factiva as a measure of visibility, reloading the data again
#take subset without countries with extremely large population and GNI
pd9715_ss<-subset(pd9715,(pd9715$iso3!="CHN" & pd9715$iso3!="USA" & pd9715$iso3!="IND"))

##################
#
prega_pd9715_ss<-plm(logstaffnumberstd~
                      loggnidmstd+
                      loggnimstd+
                      logpopulationdmstd+
                      logpopulationmstd+
                      tertiaryenrollmentintdmstd+
                      tertiaryenrollmentintmstd+
                      logunlocalgsstaffiniso3dmstd+
                      logunlocalgsstaffiniso3mstd+
                      politydm+
                      politym+
                      unp5m+
                      worldlanguagefirst_engm+
                      yearcount*loggnimstd+
                      yearcount*logpopulationmstd,
                     data=pd9715_ss,index=c("iso3", "year"), model="random", na.action=na.exclude)
summary(prega_pd9715_ss)
vcov_prega_pd9715_ss_formula<-vcovHC(prega_pd9715_ss,type="HC0",cluster="group")
prega_pd9715_ssrse <- sqrt(diag(vcov_prega_pd9715_ss_formula))
coeftest(prega_pd9715_ss, vcov=vcovHC(prega_pd9715_ss,type="HC0",cluster="group"))



##################
#
pd9715x_ss<-subset(pd9715x,(pd9715x$iso3!="CHN" & pd9715x$iso3!="USA" & pd9715x$iso3!="IND"))
pd9715xnv_ss<-subset(pd9715x_ss,pd9715x_ss$visible==0)
prega_pd9715nv_ss<-plm(logstaffnumberstd~
                    loggnidmstd+
                    loggnimstd+
                    logpopulationdmstd+
                    logpopulationmstd+
                    tertiaryenrollmentintdmstd+
                    tertiaryenrollmentintmstd+
                    logunlocalgsstaffiniso3dmstd+
                    logunlocalgsstaffiniso3mstd+
                    politydm+
                    politym+
                    unp5m+
                    worldlanguagefirst_engm+
                    yearcount*loggnimstd+
                    yearcount*logpopulationmstd,
                    data=pd9715xnv_ss, index=c("iso3", "year"), model="random", na.action=na.exclude)
summary(prega_pd9715nv_ss)
vcov_prega_pd9715nv_ss_formula<-vcovHC(prega_pd9715nv_ss,type="HC0",cluster="group")
prega_pd9715nv_ssrse <- sqrt(diag(vcov_prega_pd9715nv_ss_formula))
coeftest(prega_pd9715nv_ss, vcov=vcovHC(prega_pd9715nv_ss,type="HC0",cluster="group"))


##################
#exposed, 1996-2015
pd9715xv_ss<-subset(pd9715x_ss,pd9715x_ss$visible==1)
prega_pd9715v_ss<-plm(logstaffnumberstd~
                   loggnidmstd+
                   loggnimstd+
                   logpopulationdmstd+
                   logpopulationmstd+
                   tertiaryenrollmentintdmstd+
                   tertiaryenrollmentintmstd+
                   logunlocalgsstaffiniso3dmstd+
                   logunlocalgsstaffiniso3mstd+
                   politydm+
                   politym+
                   unp5m+
                   worldlanguagefirst_engm+
                   yearcount*loggnimstd+
                   yearcount*logpopulationmstd,
                   data=pd9715xv_ss, index=c("iso3", "year"), model="random", na.action=na.exclude)
summary(prega_pd9715v_ss)
vcov_prega_pd9715v_ss_formula<-vcovHC(prega_pd9715v_ss,type="HC0",cluster="group")
prega_pd9715v_ssrse <- sqrt(diag(vcov_prega_pd9715v_ss_formula))
coeftest(prega_pd9715v_ss, vcov=vcovHC(prega_pd9715v_ss,type="HC0",cluster="group"))
dwtest(prega_pd9715v_ss)
pbgtest(prega_pd9715v_ss)
pdwtest(prega_pd9715v_ss)


##################
#the following models, only run with Google, do not run again with FACTIVA
prega_pd9715_ggl<-plm(logstaffnumberstd~
                        loggnidmstd+
                        loggnimstd+
                        logpopulationdmstd+
                        logpopulationmstd+
                        tertiaryenrollmentintdmstd+
                        tertiaryenrollmentintmstd+
                        logunlocalgsstaffiniso3dmstd+
                        logunlocalgsstaffiniso3mstd+
                        politydm+
                        politym+
                        unp5m+
                        worldlanguagefirst_engm+
                        yearcount*loggnimstd+
                        yearcount*logpopulationmstd,
                      data=pd9715,index=c("iso3", "year"), model="random", na.action=na.exclude)
summary(prega_pd9715_ggl)
vcov_prega_pd9715_ggl_formula<-vcovHC(prega_pd9715_ggl,type="HC0",cluster="group")
prega_pd9715_gglrse <- sqrt(diag(vcov_prega_pd9715_ggl_formula))
coeftest(prega_pd9715_ggl, vcov=vcovHC(prega_pd9715_ggl,type="HC0",cluster="group"))


##################
#
pd9715xnv<-subset(pd9715x,pd9715x$visible==0)
prega_pd9715nv_ggl<-plm(logstaffnumberstd~
                     loggnidmstd+
                     loggnimstd+
                     logpopulationdmstd+
                     logpopulationmstd+
                     tertiaryenrollmentintdmstd+
                     tertiaryenrollmentintmstd+
                     logunlocalgsstaffiniso3dmstd+
                     logunlocalgsstaffiniso3mstd+
                     politydm+
                     politym+
                     unp5m+
                     worldlanguagefirst_engm+
                     yearcount*loggnimstd+
                     yearcount*logpopulationmstd,
                     data=pd9715xnv, index=c("iso3", "year"), model="random", na.action=na.exclude)
summary(prega_pd9715nv_ggl)
vcov_prega_pd9715nv_ggl_formula<-vcovHC(prega_pd9715nv_ggl,type="HC0",cluster="group")
prega_pd9715nv_gglrse <- sqrt(diag(vcov_prega_pd9715nv_ggl_formula))
coeftest(prega_pd9715nv_ggl, vcov=vcovHC(prega_pd9715nv_ggl,type="HC0",cluster="group"))


##################
#exposed, 1996-2015
pd9715xv<-subset(pd9715x,pd9715x$visible==1)
prega_pd9715v_ggl<-plm(logstaffnumberstd~
                    loggnidmstd+
                    loggnimstd+
                    logpopulationdmstd+
                    logpopulationmstd+
                    tertiaryenrollmentintdmstd+
                    tertiaryenrollmentintmstd+
                    logunlocalgsstaffiniso3dmstd+
                    logunlocalgsstaffiniso3mstd+
                    politydm+
                    politym+
                    unp5m+
                    worldlanguagefirst_engm+
                    yearcount*loggnimstd+
                    yearcount*logpopulationmstd,
                    data=pd9715xv, index=c("iso3", "year"), model="random", na.action=na.exclude)
summary(prega_pd9715v_ggl)
vcov_prega_pd9715v_ggl_formula<-vcovHC(prega_pd9715v_ggl,type="HC0",cluster="group")
prega_pd9715v_gglrse <- sqrt(diag(vcov_prega_pd9715v_ggl_formula))
coeftest(prega_pd9715v_ggl, vcov=vcovHC(prega_pd9715v_ggl,type="HC0",cluster="group"))
dwtest(prega_pd9715v_ggl)
pbgtest(prega_pd9715v_ggl)
pdwtest(prega_pd9715v_ggl)



##################
#
regs_ta11<-stargazer(prega_pd9715_ss,prega_pd9715nv_ss,prega_pd9715v_ss,prega_pd9715_ggl,prega_pd9715nv_ggl,prega_pd9715v_ggl,
                     covariate.labels = c("GNI(log) (within)","GNI(log) (between)",
                                          "Population(log) (within)","Population(log) (between)",
                                          "University enrollment (within)","University enrollment (between)",
                                          "Local IO activity(log) (within)","Local IO activity(log) (between)",
                                          "Polity (within)","Polity (between)",
                                          "UN SC permanent seat",
                                          "English official lang.",
                                          "Year count (Yrc)",
                                          "GNI(log) (between)* Yrc",
                                          "Population(log) (between)* Yrc"),
                     dep.var.labels=c("Staff number(log)(std)"),
                     column.labels=c("All IOs","Lowly visible IOs","Highly IOs","All IOs (Google)","Lowly visible IOs (Google)","Highly visible IOs (Google)"),
                     star.cutoffs = c(0.1,0.05, 0.01, 0.001),
                     star.char = c("'", "*", "**","***"),
                     se=list(prega_pd9715_ssrse,prega_pd9715nv_ssrse,prega_pd9715v_ssrse,prega_pd9715_gglrse,prega_pd9715nv_gglrse,prega_pd9715v_gglrse),
                     single.row=T, no.space=T, omit.stat = c("f", "ser"), type="html",
                     notes = "'p<0.1; *p<0.05; **p<0.01; ***p<0.001; country-clustered (Models 1 and 4) or country-IO visibility-clustered (Models 2, 3, 5, 6) robust standard errors in brackets",
                     notes.append = FALSE)

#write.table(regs_ta11, file="ta11_regs_outliers_google.html",col.names=F, row.names=FALSE,quote=FALSE)



####################################################################
####################################################################
#Table A12
#Six models, three pairs where we always use a different 'visibility' variable to distinguish highly and lowly visible IGOs
#to change the visibility variables, always return back to beginning of data, reload the main dataset and set d9715$visible to the correct variable around line 80)
#after this analysis, for other models and Tables, go back to Factiva as a measure of visibility, reloading the data again
#the variables for visibility are bigbudget (model 1 and 2), bigstaffriseabs (model 3 and 4), operational (model 5 and 6)


##################
#only run this with replacement of visibility variable with bigbudget
#small budget (bigbudget==0)
pd9715xnv<-subset(pd9715x,pd9715x$visible==0)
prega_pd9715nv_bbud<-plm(logstaffnumberstd~
                      loggnidmstd+
                      loggnimstd+
                      logpopulationdmstd+
                      logpopulationmstd+
                      tertiaryenrollmentintdmstd+
                      tertiaryenrollmentintmstd+
                      logunlocalgsstaffiniso3dmstd+
                      logunlocalgsstaffiniso3mstd+
                      politydm+
                      politym+
                      unp5m+
                      worldlanguagefirst_engm+
                      yearcount*loggnimstd+
                      yearcount*logpopulationmstd,
                      data=pd9715xnv, index=c("iso3", "year"), model="random", na.action=na.exclude)
summary(prega_pd9715nv_bbud)
vcov_prega_pd9715nv_bbud_formula<-vcovHC(prega_pd9715nv_bbud,type="HC0",cluster="group")
prega_pd9715nv_bbudrse <- sqrt(diag(vcov_prega_pd9715nv_bbud_formula))
coeftest(prega_pd9715nv_bbud, vcov=vcovHC(prega_pd9715nv_bbud,type="HC0",cluster="group"))


##################
#bigbudget==1
pd9715xe<-subset(pd9715x,pd9715x$visible==1)
prega_pd9715v_bbud<-plm(logstaffnumberstd~
                     loggnidmstd+
                     loggnimstd+
                     logpopulationdmstd+
                     logpopulationmstd+
                     tertiaryenrollmentintdmstd+
                     tertiaryenrollmentintmstd+
                     logunlocalgsstaffiniso3dmstd+
                     logunlocalgsstaffiniso3mstd+
                     politydm+
                     politym+
                     unp5m+
                     worldlanguagefirst_engm+
                     yearcount*loggnimstd+
                     yearcount*logpopulationmstd,
                     data=pd9715xe, index=c("iso3", "year"), model="random", na.action=na.exclude)
summary(prega_pd9715v_bbud)
vcov_prega_pd9715v_bbud_formula<-vcovHC(prega_pd9715v_bbud,type="HC0",cluster="group")
prega_pd9715v_bbudrse <- sqrt(diag(vcov_prega_pd9715v_bbud_formula))
coeftest(prega_pd9715v_bbud, vcov=vcovHC(prega_pd9715v_bbud,type="HC0",cluster="group"))

dwtest(prega_pd9715v_bbud)
pbgtest(prega_pd9715v_bbud)
pdwtest(prega_pd9715v_bbud)



##################
#only run this with replacement of visibility variable with big staff absolute rise
#small rise of staff (bigstaffriseabs==0)
pd9715xnv<-subset(pd9715x,pd9715x$visible==0)
prega_pd9715nv_bsra<-plm(logstaffnumberstd~
                           loggnidmstd+
                           loggnimstd+
                           logpopulationdmstd+
                           logpopulationmstd+
                           tertiaryenrollmentintdmstd+
                           tertiaryenrollmentintmstd+
                           logunlocalgsstaffiniso3dmstd+
                           logunlocalgsstaffiniso3mstd+
                           politydm+
                           politym+
                           unp5m+
                           worldlanguagefirst_engm+
                           yearcount*loggnimstd+
                           yearcount*logpopulationmstd,
                         data=pd9715xnv, index=c("iso3", "year"), model="random", na.action=na.exclude)
summary(prega_pd9715nv_bsra)
vcov_prega_pd9715nv_bsra_formula<-vcovHC(prega_pd9715nv_bsra,type="HC0",cluster="group")
prega_pd9715nv_bsrarse <- sqrt(diag(vcov_prega_pd9715nv_bsra_formula))
coeftest(prega_pd9715nv_bsra, vcov=vcovHC(prega_pd9715nv_bsra,type="HC0",cluster="group"))


########################
#bigstaffriseabs==1
pd9715xe<-subset(pd9715x,pd9715x$visible==1)
prega_pd9715v_bsra<-plm(logstaffnumberstd~
                          loggnidmstd+
                          loggnimstd+
                          logpopulationdmstd+
                          logpopulationmstd+
                          tertiaryenrollmentintdmstd+
                          tertiaryenrollmentintmstd+
                          logunlocalgsstaffiniso3dmstd+
                          logunlocalgsstaffiniso3mstd+
                          politydm+
                          politym+
                          unp5m+
                          worldlanguagefirst_engm+
                          yearcount*loggnimstd+
                          yearcount*logpopulationmstd,
                        data=pd9715xe, index=c("iso3", "year"), model="random", na.action=na.exclude)
summary(prega_pd9715v_bsra)
vcov_prega_pd9715v_bsra_formula<-vcovHC(prega_pd9715v_bsra,type="HC0",cluster="group")
prega_pd9715v_bsrarse <- sqrt(diag(vcov_prega_pd9715v_bsra_formula))
coeftest(prega_pd9715v_bsra, vcov=vcovHC(prega_pd9715v_bsra,type="HC0",cluster="group"))

dwtest(prega_pd9715v_bsra)
pbgtest(prega_pd9715v_bsra)
pdwtest(prega_pd9715v_bsra)



###################
#only run this with replacement of visibility variable with operational
#non-operational (opeational==0)
pd9715xnv<-subset(pd9715x,pd9715x$visible==0)
prega_pd9715nv_bope<-plm(logstaffnumberstd~
                           loggnidmstd+
                           loggnimstd+
                           logpopulationdmstd+
                           logpopulationmstd+
                           tertiaryenrollmentintdmstd+
                           tertiaryenrollmentintmstd+
                           logunlocalgsstaffiniso3dmstd+
                           logunlocalgsstaffiniso3mstd+
                           politydm+
                           politym+
                           unp5m+
                           worldlanguagefirst_engm+
                           yearcount*loggnimstd+
                           yearcount*logpopulationmstd,
                         data=pd9715xnv, index=c("iso3", "year"), model="random", na.action=na.exclude)
summary(prega_pd9715nv_bope)
vcov_prega_pd9715nv_bope_formula<-vcovHC(prega_pd9715nv_bope,type="HC0",cluster="group")
prega_pd9715nv_boperse <- sqrt(diag(vcov_prega_pd9715nv_bope_formula))
coeftest(prega_pd9715nv_bope, vcov=vcovHC(prega_pd9715nv_bope,type="HC0",cluster="group"))


########################
#opeational==1
pd9715xe<-subset(pd9715x,pd9715x$visible==1)
prega_pd9715v_bope<-plm(logstaffnumberstd~
                          loggnidmstd+
                          loggnimstd+
                          logpopulationdmstd+
                          logpopulationmstd+
                          tertiaryenrollmentintdmstd+
                          tertiaryenrollmentintmstd+
                          logunlocalgsstaffiniso3dmstd+
                          logunlocalgsstaffiniso3mstd+
                          politydm+
                          politym+
                          unp5m+
                          worldlanguagefirst_engm+
                          yearcount*loggnimstd+
                          yearcount*logpopulationmstd,
                        data=pd9715xe, index=c("iso3", "year"), model="random", na.action=na.exclude)
summary(prega_pd9715v_bope)
vcov_prega_pd9715v_bope_formula<-vcovHC(prega_pd9715v_bope,type="HC0",cluster="group")
prega_pd9715v_boperse <- sqrt(diag(vcov_prega_pd9715v_bope_formula))
coeftest(prega_pd9715v_bope, vcov=vcovHC(prega_pd9715v_bope,type="HC0",cluster="group"))
dwtest(prega_pd9715v_bope)
pbgtest(prega_pd9715v_bope)
pdwtest(prega_pd9715v_bope)


########################
#
regs_ta12<-stargazer(prega_pd9715nv_bbud,prega_pd9715v_bbud,prega_pd9715nv_bsra,prega_pd9715v_bsra,prega_pd9715nv_bope,prega_pd9715v_bope,
                     covariate.labels = c("GNI(log) (within)","GNI(log) (between)",
                                          "Population(log) (within)","Population(log) (between)",
                                          "University enrollment (within)","University enrollment (between)",
                                          "Local IO activity(log) (within)","Local IO activity(log) (between)",
                                          "Polity (within)","Polity (between)",
                                          "UN SC permanent seat",
                                          "English official lang.",
                                          "Year count (Yrc)",
                                          "GNI(log) (between)* Yrc",
                                          "Population(log) (between)* Yrc"),
                     dep.var.labels=c("Staff number (log)(std)"),
                     column.labels=c("Small budget","Large budget","Low staff expansion","High staff expansion","Low operational activity","High operational activity"),
                     star.cutoffs = c(0.1,0.05, 0.01, 0.001),
                     star.char = c("'", "*", "**","***"),
                     se=list(prega_pd9715nv_bbudrse,prega_pd9715v_bbudrse,prega_pd9715nv_bsrarse,prega_pd9715v_bsrarse,prega_pd9715nv_boperse,prega_pd9715v_boperse),
                     single.row=T, no.space=T, omit.stat = c("f", "ser"), type="html",
                     notes = "'p<0.1; *p<0.05; **p<0.01; ***p<0.001; country-clustered robust standard errors in brackets",
                     notes.append = FALSE)


#write.table(regs_ta12, file="ta12_regs_visibility_size.html",col.names=F, row.names=FALSE,quote=FALSE)



####################################################################
####################################################################
#stationarity check

adf.test(pd9715x$logstaffnumber[!is.na(pd9715x$logstaffnumber)],k=1)
adf.test(pd9715x$loggnidmstd[!is.na(pd9715x$loggnidmstd)],k=1)
adf.test(pd9715x$loggnimstd[!is.na(pd9715x$loggnimstd)],k=1)

adf.test(pd9715x$logpopulationdmstd[!is.na(pd9715x$logpopulationdmstd)],k=1)
adf.test(pd9715x$logpopulationmstd[!is.na(pd9715x$logpopulationmstd)],k=1)

adf.test(pd9715x$tertiaryenrollmentintdmstd[!is.na(pd9715x$tertiaryenrollmentintdmstd)],k=1)
adf.test(pd9715x$tertiaryenrollmentintmstd[!is.na(pd9715x$tertiaryenrollmentintmstd)],k=1)

adf.test(pd9715x$logunlocalgsstaffiniso3dmstd[!is.na(pd9715x$logunlocalgsstaffiniso3dmstd)],k=1)
adf.test(pd9715x$logunlocalgsstaffiniso3mstd[!is.na(pd9715x$logunlocalgsstaffiniso3mstd)],k=1)
